# =========================================================================
# ETS KOMPUTASI STATISTIKA - SEMESTER GENAP 2025/2026
# Soal 2 - Dataset: airquality
# -------------------------------------------------------------------------
# Petunjuk umum:
#  1. Kerjakan menggunakan bahasa pemrograman R.
#  2. Setiap nomor dikerjakan dalam file R script TERPISAH.
#  3. Format penamaan file: Nomor_NRP_Nama.R
# -------------------------------------------------------------------------
# Nama   :  shella yuastari____________________________________
# NRP    :  5003251193____________________________________
# Kelas  :  statistika D____________________________________
# ========================================================================

# -----------------------------------------------------------------------------
# Persiapan data
# ------------------------------------------------------------------------
data <- na.omit(airquality[, c("Ozone", "Solar.R", "Wind", "Temp")])

# dim(data)
# 111 x 4


# ========================================================================
# (a) mat_mult() - perkalian matriks manual TANPA operator %*%
# ========================================================================
# A (m x n), B (n x p) -> C (m x p)

mat_mult <- function(A, B) {

  # Mengecek apakah matriks bisa dikalikan
  if(ncol(A) != nrow(B)){
    stop("Jumlah kolom A harus sama dengan jumlah baris B")
  }

  # Ukuran matriks
  m <- nrow(A)
  n <- ncol(A)
  p <- ncol(B)

  # Matriks hasil
  C <- matrix(0, nrow = m, ncol = p)

  # Perkalian matriks manual
  for(i in 1:m){

    for(j in 1:p){

      jumlah <- 0

      for(k in 1:n){

        jumlah <- jumlah + A[i,k] * B[k,j]

      }

      C[i,j] <- jumlah

    }

  }

  return(C)

}

# -------------------------------------------------------------------------
# Validasi fungsi
# -------------------------------------------------------------------------

a <- matrix(c(1,2,3,4), nrow=2)
b <- matrix(c(5,6,7,8), nrow=2)

c_std    <- a %*% b
c_manual <- mat_mult(a,b)

{
  cat("Hasil standar (a %*% b):\n")
  print(c_std)

  cat("\nHasil mat_mult (manual):\n")
  print(c_manual)

  cat("\nSelisih:\n")
  print(c_std == c_manual)
}
Hasil standar (a %*% b):
     [,1] [,2]
[1,]   23   31
[2,]   34   46

Hasil mat_mult (manual):
     [,1] [,2]
[1,]   23   31
[2,]   34   46

Selisih:
     [,1] [,2]
[1,] TRUE TRUE
[2,] TRUE TRUE
# ========================================================================
# (b) corr_matrix(X) - matriks korelasi Pearson TANPA fungsi cor()
# ========================================================================
# Gunakan mat_mult untuk SEMUA perkalian matriks

corr_matrix <- function(X) {

  # Mengubah menjadi matriks
  X <- as.matrix(X)

  # Ukuran data
  n <- nrow(X)
  p <- ncol(X)

  # -----------------------------------------------------------------------
  # (i) Hitung rata-rata kolom
  # X_bar = (1/n) * X^T %*% 1_n
  # -----------------------------------------------------------------------

  one_n <- matrix(1, nrow = n, ncol = 1)

  X_bar <- (1/n) * mat_mult(t(X), one_n)

  # ---------------------------------------------------------------------------
  # (ii) Hitung matriks kovarians
  # X_c = X - 1_n %*% X_bar^T
  # S   = (1/(n-1)) * X_c^T %*% X_c
  # -----------------------------------------------------------------------

  mean_matrix <- mat_mult(one_n, t(X_bar))

  X_c <- X - mean_matrix

  S <- (1/(n-1)) * mat_mult(t(X_c), X_c)

  # -----------------------------------------------------------------------
  # (iii) Hitung matriks korelasi
  # R = D^{-1/2} S D^{-1/2}
  # -----------------------------------------------------------------------

  D <- diag(diag(S))

  D_inv_half <- diag(1 / sqrt(diag(D)))

  temp <- mat_mult(D_inv_half, S)

  R <- mat_mult(temp, D_inv_half)

  return(R)

}

# -------------------------------------------------------------------------
# Terapkan ke data airquality
# -------------------------------------------------------------------------

R_hat <- corr_matrix(data)

cat("\nMatriks Korelasi Pearson:\n")

Matriks Korelasi Pearson:
print(round(R_hat, 6))
          [,1]      [,2]      [,3]      [,4]
[1,]  1.000000  0.348342 -0.612497  0.698541
[2,]  0.348342  1.000000 -0.127183  0.294088
[3,] -0.612497 -0.127183  1.000000 -0.497190
[4,]  0.698541  0.294088 -0.497190  1.000000
# -------------------------------------------------------------------------
# Validasi jawaban
# -------------------------------------------------------------------------

cat("\nValidasi dengan fungsi cor():\n")

Validasi dengan fungsi cor():
print(round(R_hat,10) == round(cor(data),10))
        Ozone Solar.R Wind Temp
Ozone    TRUE    TRUE TRUE TRUE
Solar.R  TRUE    TRUE TRUE TRUE
Wind     TRUE    TRUE TRUE TRUE
Temp     TRUE    TRUE TRUE TRUE
# ========================================================================
# (c) var_test_one(x, sigma0_sq, alpha)
# ========================================================================
# H0: sigma^2 = 1000  vs  H1: sigma^2 > 1000,  alpha = 0.05
# TANPA fungsi built-in: mean(), sum(), var(), sd(), var.test()
# (Boleh: dchisq, qchisq, length, dst.)
#
# Langkah:
#  (i)  Statistik: chi^2 = (n-1) * s^2 / sigma0^2,
#        dengan s^2 = sum((x_i - xbar)^2) / (n - 1)
#  (ii) p-value via trapezoidal pada distribusi chi-square (sisi kanan):
#        p-value = 1 - integral_0^{chi2_hit} f_{chi^2_{n-1}}(x) dx
#        Aproksimasi:
#          integral ~ (h/2) * [f(0) + 2*sum_{i=1}^{N-1} f(i*h) + f(chi2_hit)]
#        dengan h = chi2_hit / N, N = 10000, df = n - 1.
#  (iii) Keputusan + interpretasi dalam konteks variasi kadar ozon harian.

var_test_one <- function(x, sigma0_sq, alpha) {
  # --- TODO (i): hitung n, xbar, dan s^2 ---
  n <- length(x)
  
  # Hitung sum(x) secara manual untuk mencari xbar
  total_x <- 0
  for (i in 1:n) {
    total_x <- total_x + x[i]
  }
  xbar <- total_x / n
  
  # Hitung sum((x_i - xbar)^2) secara manual untuk mencari s^2
  total_ss <- 0
  for (i in 1:n) {
    total_ss <- total_ss + (x[i] - xbar)^2
  }
  s_sq <- total_ss / (n - 1)
  
  # --- TODO (i): hitung chi2_hit dan df ---
  df <- n - 1
  chi2_hit <- (n - 1) * s_sq / sigma0_sq
  
  # --- TODO (ii): aproksimasi p-value via trapezoidal (N = 10000) ---
  N <- 10000
  h <- chi2_hit / N
  
  # f(0) dan f(chi2_hit) menggunakan dchisq
  f_0 <- dchisq(0, df)
  f_end <- dchisq(chi2_hit, df)
  
  # Menghitung sum_{i=1}^{N-1} f(i*h) secara manual
  sum_interior <- 0
  for (i in 1:(N - 1)) {
    x_i <- i * h
    sum_interior <- sum_interior + dchisq(x_i, df)
  }
  
  # Menghitung nilai integral dari 0 sampai chi2_hit
  integral <- (h / 2) * (f_0 + 2 * sum_interior + f_end)
  
  # p-value untuk uji pihak kanan (H1: sigma^2 > 1000)
  p_value <- 1 - integral
  
  # --- TODO (iii): hitung chi2_kritis = qchisq(1 - alpha, df) dan keputusan ---
  chi2_kritis <- qchisq(1 - alpha, df)
  
  if (chi2_hit > chi2_kritis) {
    decision <- "Tolak H0"
  } else {
    decision <- "Gagal Tolak H0"
  }
  
  # Kembalikan list: statistik uji, p-value, keputusan
  results <- list(statistic = chi2_hit, p_value = p_value, decision = decision)
  return(results)
}

# Terapkan ke Ozone
x <- data$Ozone
test_results <- var_test_one(x, sigma0_sq = 1000, alpha = 0.05)
print(test_results)
$statistic
[1] 121.8019

$p_value
[1] 0.207976

$decision
[1] "Gagal Tolak H0"
cat("\n=================== INTERPRETASI ===================\n")

=================== INTERPRETASI ===================
cat("Berdasarkan hasil uji hipotesis dengan alpha = 5%:\n")
Berdasarkan hasil uji hipotesis dengan alpha = 5%:
cat("Keputusan      :", test_results$decision, "\n")
Keputusan      : Gagal Tolak H0 
cat("Nilai P-Value  :", round(test_results$p_value, 5), "\n")
Nilai P-Value  : 0.20798 
cat("Kesimpulan     : Karena P-Value < alpha (atau Chi-Square Hitung > Chi-Square Kritis),\n")
Kesimpulan     : Karena P-Value < alpha (atau Chi-Square Hitung > Chi-Square Kritis),
cat("                 maka terdapat cukup bukti untuk menyatakan bahwa variasi (varians)\n")
                 maka terdapat cukup bukti untuk menyatakan bahwa variasi (varians)
cat("                 kadar ozon harian di udara perkotaan tersebut sudah secara signifikan\n")
                 kadar ozon harian di udara perkotaan tersebut sudah secara signifikan
cat("                 lebih besar dari 1000.\n")
                 lebih besar dari 1000.
---
title: "R Notebook"
output: html_notebook
---



```{r}
# =========================================================================
# ETS KOMPUTASI STATISTIKA - SEMESTER GENAP 2025/2026
# Soal 2 - Dataset: airquality
# -------------------------------------------------------------------------
# Petunjuk umum:
#  1. Kerjakan menggunakan bahasa pemrograman R.
#  2. Setiap nomor dikerjakan dalam file R script TERPISAH.
#  3. Format penamaan file: Nomor_NRP_Nama.R
# -------------------------------------------------------------------------
# Nama   :  shella yuastari____________________________________
# NRP    :  5003251193____________________________________
# Kelas  :  statistika D____________________________________
# ========================================================================

# -----------------------------------------------------------------------------
# Persiapan data
# ------------------------------------------------------------------------
data <- na.omit(airquality[, c("Ozone", "Solar.R", "Wind", "Temp")])

# dim(data)
# 111 x 4


# ========================================================================
# (a) mat_mult() - perkalian matriks manual TANPA operator %*%
# ========================================================================
# A (m x n), B (n x p) -> C (m x p)

mat_mult <- function(A, B) {

  # Mengecek apakah matriks bisa dikalikan
  if(ncol(A) != nrow(B)){
    stop("Jumlah kolom A harus sama dengan jumlah baris B")
  }

  # Ukuran matriks
  m <- nrow(A)
  n <- ncol(A)
  p <- ncol(B)

  # Matriks hasil
  C <- matrix(0, nrow = m, ncol = p)

  # Perkalian matriks manual
  for(i in 1:m){

    for(j in 1:p){

      jumlah <- 0

      for(k in 1:n){

        jumlah <- jumlah + A[i,k] * B[k,j]

      }

      C[i,j] <- jumlah

    }

  }

  return(C)

}

# -------------------------------------------------------------------------
# Validasi fungsi
# -------------------------------------------------------------------------

a <- matrix(c(1,2,3,4), nrow=2)
b <- matrix(c(5,6,7,8), nrow=2)

c_std    <- a %*% b
c_manual <- mat_mult(a,b)

{
  cat("Hasil standar (a %*% b):\n")
  print(c_std)

  cat("\nHasil mat_mult (manual):\n")
  print(c_manual)

  cat("\nSelisih:\n")
  print(c_std == c_manual)
}
```

```{r}
# ========================================================================
# (b) corr_matrix(X) - matriks korelasi Pearson TANPA fungsi cor()
# ========================================================================
# Gunakan mat_mult untuk SEMUA perkalian matriks

corr_matrix <- function(X) {

  # Mengubah menjadi matriks
  X <- as.matrix(X)

  # Ukuran data
  n <- nrow(X)
  p <- ncol(X)

  # -----------------------------------------------------------------------
  # (i) Hitung rata-rata kolom
  # X_bar = (1/n) * X^T %*% 1_n
  # -----------------------------------------------------------------------

  one_n <- matrix(1, nrow = n, ncol = 1)

  X_bar <- (1/n) * mat_mult(t(X), one_n)

  # ---------------------------------------------------------------------------
  # (ii) Hitung matriks kovarians
  # X_c = X - 1_n %*% X_bar^T
  # S   = (1/(n-1)) * X_c^T %*% X_c
  # -----------------------------------------------------------------------

  mean_matrix <- mat_mult(one_n, t(X_bar))

  X_c <- X - mean_matrix

  S <- (1/(n-1)) * mat_mult(t(X_c), X_c)

  # -----------------------------------------------------------------------
  # (iii) Hitung matriks korelasi
  # R = D^{-1/2} S D^{-1/2}
  # -----------------------------------------------------------------------

  D <- diag(diag(S))

  D_inv_half <- diag(1 / sqrt(diag(D)))

  temp <- mat_mult(D_inv_half, S)

  R <- mat_mult(temp, D_inv_half)

  return(R)

}

# -------------------------------------------------------------------------
# Terapkan ke data airquality
# -------------------------------------------------------------------------

R_hat <- corr_matrix(data)

cat("\nMatriks Korelasi Pearson:\n")
print(round(R_hat, 6))

# -------------------------------------------------------------------------
# Validasi jawaban
# -------------------------------------------------------------------------

cat("\nValidasi dengan fungsi cor():\n")
print(round(R_hat,10) == round(cor(data),10))
```
```{r}
# ========================================================================
# (c) var_test_one(x, sigma0_sq, alpha)
# ========================================================================
# H0: sigma^2 = 1000  vs  H1: sigma^2 > 1000,  alpha = 0.05
# TANPA fungsi built-in: mean(), sum(), var(), sd(), var.test()
# (Boleh: dchisq, qchisq, length, dst.)
#
# Langkah:
#  (i)  Statistik: chi^2 = (n-1) * s^2 / sigma0^2,
#        dengan s^2 = sum((x_i - xbar)^2) / (n - 1)
#  (ii) p-value via trapezoidal pada distribusi chi-square (sisi kanan):
#        p-value = 1 - integral_0^{chi2_hit} f_{chi^2_{n-1}}(x) dx
#        Aproksimasi:
#          integral ~ (h/2) * [f(0) + 2*sum_{i=1}^{N-1} f(i*h) + f(chi2_hit)]
#        dengan h = chi2_hit / N, N = 10000, df = n - 1.
#  (iii) Keputusan + interpretasi dalam konteks variasi kadar ozon harian.

var_test_one <- function(x, sigma0_sq, alpha) {
  # --- TODO (i): hitung n, xbar, dan s^2 ---
  n <- length(x)
  
  # Hitung sum(x) secara manual untuk mencari xbar
  total_x <- 0
  for (i in 1:n) {
    total_x <- total_x + x[i]
  }
  xbar <- total_x / n
  
  # Hitung sum((x_i - xbar)^2) secara manual untuk mencari s^2
  total_ss <- 0
  for (i in 1:n) {
    total_ss <- total_ss + (x[i] - xbar)^2
  }
  s_sq <- total_ss / (n - 1)
  
  # --- TODO (i): hitung chi2_hit dan df ---
  df <- n - 1
  chi2_hit <- (n - 1) * s_sq / sigma0_sq
  
  # --- TODO (ii): aproksimasi p-value via trapezoidal (N = 10000) ---
  N <- 10000
  h <- chi2_hit / N
  
  # f(0) dan f(chi2_hit) menggunakan dchisq
  f_0 <- dchisq(0, df)
  f_end <- dchisq(chi2_hit, df)
  
  # Menghitung sum_{i=1}^{N-1} f(i*h) secara manual
  sum_interior <- 0
  for (i in 1:(N - 1)) {
    x_i <- i * h
    sum_interior <- sum_interior + dchisq(x_i, df)
  }
  
  # Menghitung nilai integral dari 0 sampai chi2_hit
  integral <- (h / 2) * (f_0 + 2 * sum_interior + f_end)
  
  # p-value untuk uji pihak kanan (H1: sigma^2 > 1000)
  p_value <- 1 - integral
  
  # --- TODO (iii): hitung chi2_kritis = qchisq(1 - alpha, df) dan keputusan ---
  chi2_kritis <- qchisq(1 - alpha, df)
  
  if (chi2_hit > chi2_kritis) {
    decision <- "Tolak H0"
  } else {
    decision <- "Gagal Tolak H0"
  }
  
  # Kembalikan list: statistik uji, p-value, keputusan
  results <- list(statistic = chi2_hit, p_value = p_value, decision = decision)
  return(results)
}

# Terapkan ke Ozone
x <- data$Ozone
test_results <- var_test_one(x, sigma0_sq = 1000, alpha = 0.05)
print(test_results)


cat("\n=================== INTERPRETASI ===================\n")
cat("Berdasarkan hasil uji hipotesis dengan alpha = 5%:\n")
cat("Keputusan      :", test_results$decision, "\n")
cat("Nilai P-Value  :", round(test_results$p_value, 5), "\n")
cat("Kesimpulan     : Karena P-Value < alpha (atau Chi-Square Hitung > Chi-Square Kritis),\n")
cat("                 maka terdapat cukup bukti untuk menyatakan bahwa variasi (varians)\n")
cat("                 kadar ozon harian di udara perkotaan tersebut sudah secara signifikan\n")
cat("                 lebih besar dari 1000.\n")
```





