komstat uts

# -----------------------------------
# Metode Bootstrep (Soal No. 1)
# -----------------------------------
set.seed(123)             
library(boot)            
library(broom)           

# Data
calorie <- c(2475,2235,2665,2975,2750,2453,3455,2365,2445,2925)
bp      <- c(110,110,120,120,130,110,130,120,110,120)
chol    <- c(80,80,90,80,90,80,100,90,80,90)
data.df <- data.frame(chol=chol, calorie=calorie, bp=bp)

# ---------- OLS (baseline) ----------
fit <- lm(chol ~ calorie + bp, data = data.df)
summary(fit)

## 
## Call:
## lm(formula = chol ~ calorie + bp, data = data.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9867 -0.1540  0.1186  2.5290  4.2342 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.500482  22.487207   0.022   0.9829  
## calorie      0.002105   0.005791   0.363   0.7270  
## bp           0.676864   0.267922   2.526   0.0394 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.186 on 7 degrees of freedom
## Multiple R-squared:  0.7212, Adjusted R-squared:  0.6415 
## F-statistic: 9.053 on 2 and 7 DF,  p-value: 0.01145

# ---------- Fungsi statistik untuk bootstrap ----------
# fungsi index (pairs bootstrap)
boot_coef <- function(data, indices) {
  d <- data[indices, ]           # resample baris (pairs)
  fitb <- lm(chol ~ calorie + bp, data = d)
  return(coef(fitb))             # mengembalikan vector (Intercept, calorie, bp)
}

R <- 10000
b_out <- boot(data.df, statistic = boot_coef, R = R)

# ringkasan bootstrap
print(b_out)

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = data.df, statistic = boot_coef, R = R)
## 
## 
## Bootstrap Statistics :
##        original       bias    std. error
## t1* 0.500481570 -7.883735212 63.77715053
## t2* 0.002105074 -0.003994032  0.02045498
## t3* 0.676863732  0.159210859  1.00196465

# Estimasi bootstrap: mean, se (bootstrap sd), 95% percentile CI
boot_estimates <- apply(b_out$t, 2, mean)
boot_se        <- apply(b_out$t, 2, sd)
boot_ci_low    <- apply(b_out$t, 2, function(x) quantile(x, 0.025))
boot_ci_high   <- apply(b_out$t, 2, function(x) quantile(x, 0.975))

result_table <- data.frame(
  term = c("(Intercept)","calorie","bp"),
  ols_est = coef(fit),
  ols_se  = coef(summary(fit))[, "Std. Error"],
  boot_mean = boot_estimates,
  boot_se   = boot_se,
  ci_2.5    = boot_ci_low,
  ci_97.5   = boot_ci_high
)
print(result_table)

##                    term     ols_est       ols_se    boot_mean     boot_se
## (Intercept) (Intercept) 0.500481570 22.487206557 -7.383253642 63.77715053
## calorie         calorie 0.002105074  0.005791392 -0.001888958  0.02045498
## bp                   bp 0.676863732  0.267922215  0.836074591  1.00196465
##                   ci_2.5     ci_97.5
## (Intercept) -65.34679575 32.94971584
## calorie      -0.01698065  0.01267471
## bp            0.29545994  1.59052400

# ---------- Uji signifikansi berbasis bootstrap (approx) ----------
# p-value approx two-sided: prop(bootstrap coef <= 0) * 2 atau prop >=0 *2 (whichever smaller*2)
boot_pvals <- sapply(1:ncol(b_out$t), function(j) {
  prop_le0 <- mean(b_out$t[,j] <= 0)
  prop_ge0 <- mean(b_out$t[,j] >= 0)
  p <- 2*min(prop_le0, prop_ge0)
  return(p)
})
data.frame(term=c("(Intercept)","calorie","bp"), boot_pvalue = boot_pvals)

##          term boot_pvalue
## 1 (Intercept)      0.8730
## 2     calorie      0.9982
## 3          bp      0.0034

# -----------------------------------
# Metode EM (Soal No. 3)
# -----------------------------------

# data
n  <- 197
y1 <- 125
y3 <- 18
y4 <- 20
y5 <- 34

# fungsi update EM
em_update <- function(pi_old) {
  (4/n) * ((y1 * pi_old / (2 + pi_old)) + y5)
}

# iterasi EM
pi_est <- 0.5   # tebakan awal
tol <- 1e-6
maxit <- 1000

for (t in 1:maxit) {
  pi_new <- em_update(pi_est)
  if (abs(pi_new - pi_est) < tol) {
    cat("Konvergen pada iterasi", t, "dengan pi =", pi_new, "\n")
    break
  }
  pi_est <- pi_new
}

## Konvergen pada iterasi 15 dengan pi = 1.940097

# hasil akhir
pi_hat <- pi_new
pi_hat

## [1] 1.940097

# ----------------------------
# Jackknife delete-d (Soal No.4)
# ----------------------------

set.seed(123)   # reproducible

# 1) Bangkitkan data
N <- 10000
mu <- 50
sigma <- 10
x <- rnorm(N, mean = mu, sd = sigma)

# 2) Parameter jackknife
d <- 100
g <- N / d   # harus bulat (100)

# 3) Bagi data jadi blok
perm <- sample.int(N)
blocks <- split(perm, rep(1:g, each = d))

# 4) Estimator penuh
mean_full <- mean(x)
var_full  <- var(x)

# 5) Leave-one-block-out estimators
mean_leave <- numeric(g)
var_leave  <- numeric(g)

for (i in 1:g) {
  idx_keep <- setdiff(1:N, blocks[[i]])
  xi <- x[idx_keep]
  mean_leave[i] <- mean(xi)
  var_leave[i]  <- var(xi)
}

# ---- Fungsi bantu jackknife ----
jackknife <- function(full, leave){
  g <- length(leave)
  theta_bar <- mean(leave)
  bias <- (g-1) * (theta_bar - full)
  theta_jack <- full - bias
  varjack <- (g-1)/g * sum((leave - theta_bar)^2)
  se_jack <- sqrt(varjack)
  ci <- theta_jack + c(-1,1) * 1.96 * se_jack
  list(full=full, bias=bias, jack=theta_jack,
       se=se_jack, ci=ci)
}

# 6) Hitung hasil untuk mean & variance
res_mean <- jackknife(mean_full, mean_leave)
res_var  <- jackknife(var_full, var_leave)

# 7) Cetak hasil
cat("=== Mean ===\n")

## === Mean ===

print(res_mean)

## $full
## [1] 49.97628
## 
## $bias
## [1] 0
## 
## $jack
## [1] 49.97628
## 
## $se
## [1] 0.1131254
## 
## $ci
## [1] 49.75456 50.19801

cat("\n=== Variance ===\n")

## 
## === Variance ===

print(res_var)

## $full
## [1] 99.72751
## 
## $bias
## [1] -0.002824881
## 
## $jack
## [1] 99.73034
## 
## $se
## [1] 1.546977
## 
## $ci
## [1]  96.69826 102.76241

# -----------------------------------
# Fixed Point Iteration + Plot lintasan
# f(x) = exp(-x) - x^2/2. (Soal No. 5)
# -----------------------------------

g <- function(x) -exp(-x)

fixed_point_iter <- function(x0, tol=1e-6, maxit=50) {
  x <- x0
  cat("Iterasi ke-0:", x, "\n")
  for (i in 1:maxit) {
    x_new <- g(x)
    
    # cek apakah hasil valid
    if (!is.finite(x_new)) {
      cat("Iterasi ke-", i, ": nilai tidak valid (NA/Inf), stop.\n")
      return(NA)
    }
    
    cat("Iterasi ke-", i, ": ", x_new, "\n")
    
    # cek konvergensi
    if (abs(x_new - x) < tol) {
      cat("Konvergen pada iterasi ke-", i, 
          " dengan x =", x_new, "\n")
      return(x_new)
    }
    
    # update
    x <- x_new
  }
  cat("Tidak konvergen setelah", maxit, "iterasi.\n")
  return(NA)
}

# Contoh uji dengan berbagai titik awal
cat("Mulai dari x0 = 0\n")

## Mulai dari x0 = 0

fixed_point_iter(0)

## Iterasi ke-0: 0 
## Iterasi ke- 1 :  -1 
## Iterasi ke- 2 :  -2.718282 
## Iterasi ke- 3 :  -15.15426 
## Iterasi ke- 4 :  -3814279 
## Iterasi ke- 5 : nilai tidak valid (NA/Inf), stop.

## [1] NA

cat("\nMulai dari x0 = -1\n")

## 
## Mulai dari x0 = -1

fixed_point_iter(-1)

## Iterasi ke-0: -1 
## Iterasi ke- 1 :  -2.718282 
## Iterasi ke- 2 :  -15.15426 
## Iterasi ke- 3 :  -3814279 
## Iterasi ke- 4 : nilai tidak valid (NA/Inf), stop.

## [1] NA

cat("\nMulai dari x0 = 2\n")

## 
## Mulai dari x0 = 2

fixed_point_iter(2)

## Iterasi ke-0: 2 
## Iterasi ke- 1 :  -0.1353353 
## Iterasi ke- 2 :  -1.144921 
## Iterasi ke- 3 :  -3.142192 
## Iterasi ke- 4 :  -23.15456 
## Iterasi ke- 5 :  -11373617267 
## Iterasi ke- 6 : nilai tidak valid (NA/Inf), stop.

## [1] NA

g <- function(x) -exp(-x)

fpi_path <- function(x0, maxit=20) {
  x_vals <- numeric(maxit+1)
  x_vals[1] <- x0
  for (i in 1:maxit) {
    x_new <- g(x_vals[i])
    if (!is.finite(x_new)) {
      x_vals[(i+1):(maxit+1)] <- NA
      break
    }
    x_vals[i+1] <- x_new
  }
  return(x_vals)
}

# Contoh lintasan dari beberapa titik awal
path0 <- fpi_path(0, maxit=20)
path1 <- fpi_path(-1, maxit=20)
path2 <- fpi_path(2, maxit=20)

# Plot lintasan iterasi
plot(path0, type="b", pch=19, col="blue",
     ylim=range(c(path0, path1, path2), na.rm=TRUE),
     xlab="Iterasi", ylab="x_n",
     main="Lintasan Fixed-Point Iteration (x_{n+1} = -exp(-x_n))")
lines(path1, type="b", pch=17, col="red")
lines(path2, type="b", pch=15, col="darkgreen")
legend("topright", legend=c("x0=0","x0=-1","x0=2"),
       col=c("blue","red","darkgreen"),
       pch=c(19,17,15), lty=1)

komstat uts

Linda Apriliana

2025-09-26