Machine Learningt

Tugas dan Latihan

1 Materi 2

1.1 Latihan 2.11

  1. Ubah fungsi sebenarnya menjadi 𝑓(𝑥)=sin(2𝜋𝑥) dan ulangi analisis. Bagaimana berubah bias–variance?
  2. Ubah ukuran sampel n dari 40 menjadi 200. Apa yang terjadi pada variance model fleksibel?
  3. Coba beberapa nilai lambda pada ridge. Kapan model mulai underfit?
  4. (Opsional) Coba deg=5 vs deg=15. Jelaskan perubahan kompleksitas.

1.1.1 Soal 1

1. Simulasi Data Mengubah fungsi menjadi f(x)=(2x)

set.seed(123)

f_true <- function(x) sin(2*pi*x)

sigma <- 0.8
n <- 40

x_grid <- seq(-2,2,length.out=120)
f_grid <- f_true(x_grid)

x <- runif(n,-2,2)
y <- f_true(x) + rnorm(n,0,sigma)

plot(x,y,pch=16,main="Training sample: Y = sin(2πx) + noise")
lines(x_grid,f_grid,lwd=2)

2. Fit 2 model

# model sederhana
m_lm <- lm(y ~ x)

# model fleksibel
deg <- 15
m_poly <- lm(y ~ poly(x, deg, raw = TRUE))
plot(x, y, pch=16, main="Fit pada satu dataset")
lines(x_grid, f_grid, lwd=2)

lines(x_grid,
      predict(m_lm, newdata=data.frame(x=x_grid)),
      lwd=2, lty=2)

lines(x_grid,
      predict(m_poly, newdata=data.frame(x=x_grid)),
      lwd=2, lty=3)

legend("topleft",
       legend=c("f(x) true",
                "Linear (OLS)",
                paste0("Polynomial deg=",deg)),
       lty=c(1,2,3),
       bty="n")

Interpretasi:

• Model linear tidak mampu mengikuti pola sinus → bias tinggi • Model polynomial lebih fleksibel → bias lebih kecil tetapi berpotensi variance tinggi

3.Simulasi bias–variance

B <- 300

pred_lm <- matrix(NA_real_,B,length(x_grid))
pred_poly <- matrix(NA_real_,B,length(x_grid))
set.seed(2026)

for(b in 1:B){

  x_b <- runif(n,-2,2)
  y_b <- f_true(x_b) + rnorm(n,0,sigma)

  fit_lm <- lm(y_b ~ x_b)
  fit_poly <- lm(y_b ~ poly(x_b,deg,raw=TRUE))

  pred_lm[b,] <- predict(fit_lm,
                         newdata=data.frame(x_b=x_grid))

  pred_poly[b,] <- predict(fit_poly,
                           newdata=data.frame(x_b=x_grid))
}
bv_summary <- function(pred_mat,f_grid,sigma2){

  mu_hat <- colMeans(pred_mat)

  bias2 <- (mu_hat - f_grid)^2

  varhat <- apply(pred_mat,2,var)

  msehat <- bias2 + varhat + sigma2

  list(
    mu_hat = mu_hat,
    bias2 = bias2,
    variance = varhat,
    msehat = msehat
  )
}

s_lm <- bv_summary(pred_lm,f_grid,sigma^2)
s_poly <- bv_summary(pred_poly,f_grid,sigma^2)
par(mfrow=c(2,2),mar=c(4,4,2,1))

plot(x_grid,s_lm$bias2,type="l",lwd=2,
     main="Bias^2 vs x (Linear)",
     ylab="Bias^2",xlab="x")

plot(x_grid,s_lm$variance,type="l",lwd=2,
     main="Variance vs x (Linear)",
     ylab="Variance",xlab="x")

plot(x_grid,s_poly$bias2,type="l",lwd=2,
     main=paste0("Bias^2 vs x (Poly ",deg,")"),
     ylab="Bias^2",xlab="x")

plot(x_grid,s_poly$variance,type="l",lwd=2,
     main=paste0("Variance vs x (Poly ",deg,")"),
     ylab="Variance",xlab="x")

4.Ringkasan

avg_metrics <- function(s) {
  c(
    Bias2 = mean(s$bias2),
    Variance = mean(s$variance),
    Irreducible = sigma^2,
    MSE = mean(s$msehat)
  )
}

rbind(
  Linear_OLS = avg_metrics(s_lm),
  Poly_high_degree = avg_metrics(s_poly)
)
##                        Bias2     Variance Irreducible          MSE
## Linear_OLS         0.4781825 5.881990e-02        0.64 1.177002e+00
## Poly_high_degree 578.6735751 1.912306e+05        0.64 1.918099e+05

Interpretasi

Ketika fungsi sebenarnya menjadi sinus:

• model linear memiliki bias besar

• model polynomial memiliki bias lebih kecil tetapi variance tinggi

Ini memperjelas bias–variance tradeoff.

1.1.2 Soal 2

set.seed(123)

f_true <- function(x) sin(2*pi*x)

sigma <- 0.8
n <- 200

x_grid <- seq(-2,2,length.out=120)
f_grid <- f_true(x_grid)

x <- runif(n,-2,2)
y <- f_true(x) + rnorm(n,0,sigma)

plot(x,y,pch=16,main="Training sample: Y = sin(2πx) + noise")
lines(x_grid,f_grid,lwd=2)

# model sederhana
m_lm <- lm(y ~ x)

# model fleksibel
deg <- 15
m_poly <- lm(y ~ poly(x, deg, raw = TRUE))
plot(x, y, pch=16, main="Fit pada satu dataset")
lines(x_grid, f_grid, lwd=2)

lines(x_grid,
      predict(m_lm, newdata=data.frame(x=x_grid)),
      lwd=2, lty=2)

lines(x_grid,
      predict(m_poly, newdata=data.frame(x=x_grid)),
      lwd=2, lty=3)

legend("topleft",
       legend=c("f(x) true",
                "Linear (OLS)",
                paste0("Polynomial deg=",deg)),
       lty=c(1,2,3),
       bty="n")

B <- 300

pred_lm <- matrix(NA_real_,B,length(x_grid))
pred_poly <- matrix(NA_real_,B,length(x_grid))
set.seed(2026)

for(b in 1:B){

  x_b <- runif(n,-2,2)
  y_b <- f_true(x_b) + rnorm(n,0,sigma)

  fit_lm <- lm(y_b ~ x_b)
  fit_poly <- lm(y_b ~ poly(x_b,deg,raw=TRUE))

  pred_lm[b,] <- predict(fit_lm,
                         newdata=data.frame(x_b=x_grid))

  pred_poly[b,] <- predict(fit_poly,
                           newdata=data.frame(x_b=x_grid))
}
bv_summary <- function(pred_mat,f_grid,sigma2){

  mu_hat <- colMeans(pred_mat)

  bias2 <- (mu_hat - f_grid)^2

  varhat <- apply(pred_mat,2,var)

  msehat <- bias2 + varhat + sigma2

  list(
    mu_hat = mu_hat,
    bias2 = bias2,
    variance = varhat,
    msehat = msehat
  )
}

s_lm <- bv_summary(pred_lm,f_grid,sigma^2)
s_poly <- bv_summary(pred_poly,f_grid,sigma^2)
par(mfrow=c(2,2),mar=c(4,4,2,1))

plot(x_grid,s_lm$bias2,type="l",lwd=2,
     main="Bias^2 vs x (Linear)",
     ylab="Bias^2",xlab="x")

plot(x_grid,s_lm$variance,type="l",lwd=2,
     main="Variance vs x (Linear)",
     ylab="Variance",xlab="x")

plot(x_grid,s_poly$bias2,type="l",lwd=2,
     main=paste0("Bias^2 vs x (Poly ",deg,")"),
     ylab="Bias^2",xlab="x")

plot(x_grid,s_poly$variance,type="l",lwd=2,
     main=paste0("Variance vs x (Poly ",deg,")"),
     ylab="Variance",xlab="x")

avg_metrics <- function(s) {
  c(
    Bias2 = mean(s$bias2),
    Variance = mean(s$variance),
    Irreducible = sigma^2,
    MSE = mean(s$msehat)
  )
}

rbind(
  Linear_OLS = avg_metrics(s_lm),
  Poly_high_degree = avg_metrics(s_poly)
)
##                        Bias2   Variance Irreducible       MSE
## Linear_OLS       0.477622715 0.01100145        0.64 1.1286242
## Poly_high_degree 0.001518939 0.14745757        0.64 0.7889765

Interpretasi

• variance model fleksibel menurun

• model menjadi lebih stabil

• overfitting berkurang

1.1.3 Soal 3

# fungsi ridge
ridge_fit_predict <- function(x_train, y_train, x_grid, deg=15, lambda=1){
  Xtr <- sapply(0:deg, function(k) x_train^k)
  Xg  <- sapply(0:deg, function(k) x_grid^k)

  fit <- glmnet(Xtr, y_train, alpha=0, lambda=lambda, standardize=TRUE)

  as.numeric(predict(fit, newx=Xg, s=lambda))
}

# simulasi ridge
lambda <- 3
pred_ridge <- matrix(NA_real_, nrow=B, ncol=length(x_grid))

set.seed(2026)
for(b in 1:B){

  x_b <- runif(n,-2,2)
  y_b <- f_true(x_b) + rnorm(n,0,sigma)

  pred_ridge[b,] <- ridge_fit_predict(
    x_b,
    y_b,
    x_grid,
    deg=deg,
    lambda=lambda
  )
}

# bias-variance ridge
s_ridge <- bv_summary(pred_ridge, f_grid, sigma^2)

# ringkasan model
rbind(
  Linear_OLS = avg_metrics(s_lm),
  Poly_high_degree = avg_metrics(s_poly),
  Ridge_poly_features = avg_metrics(s_ridge)
)
##                           Bias2   Variance Irreducible       MSE
## Linear_OLS          0.477622715 0.01100145        0.64 1.1286242
## Poly_high_degree    0.001518939 0.14745757        0.64 0.7889765
## Ridge_poly_features 0.455969500 0.01160909        0.64 1.1075786
# plot rata-rata prediksi
plot(x_grid, f_grid, type="l", lwd=2,
     main="Mean Prediction E[f^(x)] vs f(x)",
     ylab="y", xlab="x")

lines(x_grid, s_lm$mu_hat, lwd=2, lty=2)
lines(x_grid, s_poly$mu_hat, lwd=2, lty=3)
lines(x_grid, s_ridge$mu_hat, lwd=2, lty=4)

legend("topleft",
       legend=c("f(x) true",
                "Linear",
                paste0("Poly ",deg),
                paste0("Ridge (lambda=",lambda,")")),
       lty=c(1,2,3,4),
       bty="n")

Interpretasi

• Model polynomial derajat tinggi (tanpa regularization) memiliki variance sangat besar, sehingga prediksi sangat sensitif terhadap perubahan data training.

• Ridge regression menambahkan penalti pada koefisien model, sehingga koefisien menjadi lebih kecil dan model menjadi lebih stabil.

• Dengan λ sedang (misalnya λ = 3), variance model menurun cukup besar sementara bias hanya meningkat sedikit. Hal ini biasanya menghasilkan MSE yang lebih kecil dibanding polynomial biasa.

• Ketika λ terlalu kecil, ridge hampir sama dengan model polynomial biasa sehingga variance masih tinggi.

• Ketika λ terlalu besar, koefisien model ditekan terlalu kuat sehingga model kehilangan fleksibilitas dan mulai underfit.

• Hasil ini menunjukkan bahwa regularization membantu mengontrol tradeoff antara bias dan variance dan dapat meningkatkan kemampuan generalisasi model.

1.1.4 Soal 4

Deg 5

# degree 5
deg <- 5

# fit model
m_poly5 <- lm(y ~ poly(x, deg, raw=TRUE))

# plot fit pada satu dataset
plot(x, y, pch=16, main="Polynomial Fit (deg=5)")
lines(x_grid, f_grid, lwd=2)

lines(x_grid,
      predict(m_poly5, newdata=data.frame(x=x_grid)),
      lwd=2, lty=3)

legend("topleft",
       legend=c("f(x) true","Poly deg=5"),
       lty=c(1,3), bty="n")

# simulasi bias-variance
pred_poly5 <- matrix(NA_real_, B, length(x_grid))

set.seed(2026)
for(b in 1:B){

  x_b <- runif(n,-2,2)
  y_b <- f_true(x_b) + rnorm(n,0,sigma)

  fit_poly5 <- lm(y_b ~ poly(x_b,5,raw=TRUE))

  pred_poly5[b,] <- predict(fit_poly5,
                            newdata=data.frame(x_b=x_grid))
}

# hitung bias variance
s_poly5 <- bv_summary(pred_poly5, f_grid, sigma^2)

Deg 15

# degree 15
deg <- 15

# fit model
m_poly15 <- lm(y ~ poly(x, deg, raw=TRUE))

# plot fit pada satu dataset
plot(x, y, pch=16, main="Polynomial Fit (deg=15)")
lines(x_grid, f_grid, lwd=2)

lines(x_grid,
      predict(m_poly15, newdata=data.frame(x=x_grid)),
      lwd=2, lty=3)

legend("topleft",
       legend=c("f(x) true","Poly deg=15"),
       lty=c(1,3), bty="n")

# simulasi bias-variance
pred_poly15 <- matrix(NA_real_, B, length(x_grid))

set.seed(2026)
for(b in 1:B){

  x_b <- runif(n,-2,2)
  y_b <- f_true(x_b) + rnorm(n,0,sigma)

  fit_poly15 <- lm(y_b ~ poly(x_b,15,raw=TRUE))

  pred_poly15[b,] <- predict(fit_poly15,
                             newdata=data.frame(x_b=x_grid))
}

# hitung bias variance
s_poly15 <- bv_summary(pred_poly15, f_grid, sigma^2)

Ringkasan perbandingan

rbind(
  Poly_deg5 = avg_metrics(s_poly5),
  Poly_deg15 = avg_metrics(s_poly15)
)
##                  Bias2   Variance Irreducible       MSE
## Poly_deg5  0.442995918 0.03558981        0.64 1.1185857
## Poly_deg15 0.001518939 0.14745757        0.64 0.7889765

Interpretasi

Polynomial deg = 5

•   model lebih sederhana
•   bias lebih besar
•   variance lebih kecil

Polynomial deg = 15

•   model sangat fleksibel
•   bias kecil
•   variance sangat besar

Kesimpulan:

meningkatkan kompleksitas model mengurangi bias tetapi meningkatkan variance (bias–variance tradeoff).