# =============================================================================
# ETS KOMPUTASI STATISTIKA - SEMESTER GENAP 2025/2026
# Soal 2 - Dataset: airquality
# -----------------------------------------------------------------------------
# Petunjuk umum:
#  1. Kerjakan menggunakan bahasa pemrograman R.
#  2. Setiap nomor dikerjakan dalam file R script TERPISAH.
#  3. Format penamaan file: Nomor_NRP_Nama.R  (mis. 2_5003221001_Budi.R)
#  4. Ikuti ketentuan penggunaan fungsi built-in pada masing-masing sub-soal.
# -----------------------------------------------------------------------------
# Nama   :  GISELA SEKAR KINANTHI
# NRP    :  5003251013
# Kelas  :  D
# =============================================================================

# -----------------------------------------------------------------------------
# Persiapan data
# -----------------------------------------------------------------------------
data <- na.omit(airquality[, c("Ozone", "Solar.R", "Wind", "Temp")])
# dim(data)  # 111 x 4


# =============================================================================
# (a) mat_mult() - perkalian matriks manual TANPA operator %*%
# =============================================================================
# A (m x n), B (n x p) -> C (m x p) dengan c_ij = sum_{k=1}^n a_ik * b_kj

mat_mult <- function(A, B) {
  n <- nrow(A)
  m <- ncol(B)
  p <- ncol(A)
  
  hasil <- matrix(0, nrow=n, ncol=m)
  
  for(i in 1:n){
    for(j in 1:m){
      jumlah <- 0
      for(k in 1:p){
        jumlah <- jumlah + A[i,k] * B[k,j]
      }
      hasil[i,j] <- jumlah
    }
  }
  return(hasil)
  
}

# Validasi fungsi

a <- matrix(c(1,2,3,4), nrow=2)
b <- matrix(c(5,6,7,8), nrow=2)

c_std    <- a %*% b
c_manual <- mat_mult(a,b)

{
  cat("Hasil standar (a %*% b):\n")
  print(c_std)
  cat("\nHasil mat_mult (manual):\n")
  print(c_manual)
  cat("Selisih: \n")
  print(c_std == c_manual)
}
## Hasil standar (a %*% b):
##      [,1] [,2]
## [1,]   23   31
## [2,]   34   46
## 
## Hasil mat_mult (manual):
##      [,1] [,2]
## [1,]   23   31
## [2,]   34   46
## Selisih: 
##      [,1] [,2]
## [1,] TRUE TRUE
## [2,] TRUE TRUE
# =============================================================================
# (b) corr_matrix(X) - matriks korelasi Pearson TANPA fungsi cor()
# =============================================================================
# Langkah (gunakan mat_mult untuk SEMUA perkalian matriks):
#  (i)   X_bar = (1/n) * X^T %*% 1_n
#  (ii)  X_c = X - 1_n %*% X_bar^T
#        S   = (1/(n-1)) * X_c^T %*% X_c
#  (iii) D^{-1/2} = diag(1 / sqrt(diag(S)))
#        R   = D^{-1/2} %*% S %*% D^{-1/2}

corr_matrix <- function(X) {
  X <- as.matrix(X)
  n <- nrow(X)
  p <- ncol(X)
  satu <- matrix(1, nrow = n, ncol = 1)
  X_bar <- (1/n) * mat_mult(t(X),satu)
  X_c <- X - (mat_mult(satu,t(X_bar)))
  S <- (1/(n-1)) * mat_mult(t(X_c),X_c)
  D <- diag(1 / sqrt(diag(S)))
  sementara1 <- mat_mult(D, S)
  R <- mat_mult(sementara1, D)
  
}

# TODO: terapkan ke data kualitas udara dan tampilkan
R_hat <- corr_matrix(data)
print(round(R_hat, 6))
##           [,1]      [,2]      [,3]      [,4]
## [1,]  1.000000  0.348342 -0.612497  0.698541
## [2,]  0.348342  1.000000 -0.127183  0.294088
## [3,] -0.612497 -0.127183  1.000000 -0.497190
## [4,]  0.698541  0.294088 -0.497190  1.000000
# Validasi jawaban
round(R_hat,10) == round(cor(data),10)
##         Ozone Solar.R Wind Temp
## Ozone    TRUE    TRUE TRUE TRUE
## Solar.R  TRUE    TRUE TRUE TRUE
## Wind     TRUE    TRUE TRUE TRUE
## Temp     TRUE    TRUE TRUE TRUE
# =============================================================================
# (c) var_test_one(x, sigma0_sq, alpha)
# =============================================================================
# H0: sigma^2 = 1000  vs  H1: sigma^2 > 1000,  alpha = 0.05
# TANPA fungsi built-in: mean(), sum(), var(), sd(), var.test()
# (Boleh: dchisq, qchisq, length, dst.)
#
# Langkah:
#  (i)  Statistik: chi^2 = (n-1) * s^2 / sigma0^2,
#       dengan s^2 = sum((x_i - xbar)^2) / (n - 1)
#  (ii) p-value via trapezoidal pada distribusi chi-square (sisi kanan):
#       p-value = 1 - integral_0^{chi2_hit} f_{chi^2_{n-1}}(x) dx
#       Aproksimasi:
#         integral ~ (h/2) * [f(0) + 2*sum_{i=1}^{N-1} f(i*h) + f(chi2_hit)]
#       dengan h = chi2_hit / N, N = 10000, df = n - 1.
#  (iii) Keputusan + interpretasi dalam konteks variasi kadar ozon harian.

var_test_one <- function(x, sigma0_sq, alpha) {
  # TODO (i): hitung n, xbar, dan s^2
  n <- length(x)
  sum <- 0
  for(i in 1:n){
    sum <- sum + x[i]
  }
  xbar <- sum/n
  sum2 <- 0
  for(j in 1:n){
    sum2 <- sum2 + (x[i]-xbar)^2
  }
  s2 <- sum2/(n-1)
  
  # TODO (i): hitung chi2_hit dan df
  chi2_hit <- ((n-1)*s2) / sigma0_sq
  df <- n-1
  
  # TODO (ii): aproksimasi p-value via trapezoidal (N = 10000)
  trapezoidal <- function(chi2_hit, df,N) {
    h <- chi2_hit/N
    fa <- dchisq(0, df=df)
    fb <- dchisq(chi2_hit, df=df)
    sum <- 0
    
    for (i in 1:(N-1)) {
      x <- i*h
      sum <- sum+dchisq(x, df=df)
    }
    hasil <- (h/2)*(fa+2*sum+fb)
    
    return(hasil)
  }
  p_value <- trapezoidal(chi2_hit, df,10000)
  
  # TODO (iii): hitung chi2_kritis = qchisq(1 - alpha, df) dan keputusan
  chi2_kritis <- qchisq(1-alpha,df)
  if(p_value < alpha){
    decision <- "tolak h0. tidak ada cukup bukti bahwa ozon 1000"
  } else {
    decision <- "gagal tolak h0. cukup bukti bahwa ozon 1000"
  }
  
  # Kembalikan list: statistik uji, p-value, keputusan
  results <- list(statistic = chi2_hit, p_value = p_value, decision = decision)
}

# TODO: terapkan ke Ozone
x <- data$Ozone
test_results <- var_test_one(x, sigma0_sq = 1000, alpha = 0.05)
print(test_results)
## $statistic
## [1] 54.20909
## 
## $p_value
## [1] 1.673108e-06
## 
## $decision
## [1] "tolak h0. tidak ada cukup bukti bahwa ozon 1000"