NOTES!!!!!

How to do Hypothesis testing

  1. Read the question carefully. what is known?
  2. Data
    1. one sample, n > 30, sd known = Z test
    2. one sample, n < 30, sd unknown = t test
    3. two sample, n > 30, sd known = Z test
    4. two sample, n < 30, sd unknwon = t test
  3. Compute the Z or t score
  4. Write PDF for z and t distribution
  5. use integration method (trapezoidal/simpson) -> if using t, inside the function add v(df) in function (f,a,b,n=1000)

Things to remember

A. P (P-value)

  • two tail: 2*1- (P(abs(Ztest)))
  • upper tail: 1 - P(Ztest)
  • lower tail: P(Ztest)

B. q (Inverse of P)

  • two tail: q(1-alpha/2)
  • upper tail: q(1-alpha)
  • lower tail: q(alpha)

C. P-value integral

  • two tail: a = |Zscore|, b = 100
  • upper tail: a = Zscore, b = 100
  • lower tail: a = -100, b = Zscore

One Sample Z test

An electrical firm manufactures light bulbs that have a lifetime that is approximately normally distributet with a mean of 800 hours and a standard deviation of 40 hours. Test the hypothesis that µ = 800 hours against the alternative, µ != 800 hours, if a random sample of 30 bulbs has an average life of 788 hours. Use a P-value in your answer.

n = 30
miu = 800
xbar = 788
sd = 40

#HO : µ = 800
#H1 : µ != 800


# ------ Sample Z-Test Function ---
OneSample_Z <- function(xbar, miu, sigma, n, alpha = 0.05, type = "two.tail") {
  
  zscore <- (xbar - miu) / (sigma / sqrt(n))
  
  PDF_Normal <- function(x) {
    1/sqrt(2*pi) * exp(-x^2/2)
  }
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a) + f(b)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_Normal, abs(zscore), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_Normal, -100, zscore)
  } else if (type == "upper") {
    p_trap <- trap(PDF_Normal, zscore, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pnorm(abs(zscore)))
    crit_val <- qnorm(1 - alpha / 2)
  } else if (type == "lower") {
    p_builtin <- pnorm(zscore)
    crit_val <- qnorm(alpha)
  } else if (type == "upper") {
    p_builtin <- 1 - pnorm(zscore)
    crit_val <- qnorm(1 - alpha)
  }
  
  
  # --- Output Results ---
  cat("=== One Sample Z-Test ===\n")
  cat("Sample mean (x̄)    =" ,round(xbar, 4), "\n")
  cat("Population mean μ  =", miu, "\n")
  cat("Z-statistic        =", round(zscore, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical Z value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

OneSample_Z(xbar, miu, sd, n)
=== One Sample Z-Test ===
Sample mean (x̄)    = 788 
Population mean μ  = 800 
Z-statistic        = -1.6432 
p-value (Trap)     = 0.10062 
p-value (pnorm)    = 0.10035 
alpha              = 0.05 
Critical Z value   = 1.96 

>>> Fail to Reject H0 (Not Significant)

One Sample t test

Test the hypothesis that the average content of containers of a particular lubricant is 10 liters if the contents of a random sample of 10 containers are 10.2, 9.7, 10.1, 10.3, 10.1, 9.8, 9.9, 10.4, 10.3, and 9.8 liters. Use a 0.01 level of significance and assume that the distribution of contents is normal.


data <- c(10.2, 9.7, 10.1, 10.3, 10.1, 9.8, 9.9, 10.4, 10.3, 9.8)
miu <- 10



OneSample_t <- function(x, miu, alpha = 0.01, type = "two.tail") {
  
  # --- define the xbar, s, etc ---
  n = length(x)
  x_bar = sum(x)/n
  s = sd(x)
  v = n-1
  
  # --- define the tscore and pdf ---
  
  t_score = (x_bar - miu) / (s/ sqrt(n))
  
  PDF_t <- function(t, v) {
  (gamma((v+1)/2) / (sqrt(v*pi) * gamma(v/2))) * (1 + (t^2)/v)^(-(v+1)/2)
}
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a, v) + f(b, v)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h, v)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_t, abs(t_score), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_t, -100, t_score)
  } else if (type == "upper") {
    p_trap <- trap(PDF_t, t_score, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pt(abs(t_score),v))
    crit_val <- qt(1 - alpha / 2,v)
  } else if (type == "lower") {
    p_builtin <- pt(t_score, v)
    crit_val <- qt(alpha,v)
  } else if (type == "upper") {
    p_builtin <- 1 - pt(t_score,v)
    crit_val <- qt(1 - alpha,v)
  }
  
  
  # --- Output Results ---
  cat("=== One Sample Z-Test ===\n")
  cat("Sample mean (x̄)    =" ,round(x_bar, 4), "\n")
  cat("Population mean μ  =", miu, "\n")
  cat("t-statistic        =", round(t_score, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical t value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

OneSample_t(data, miu)
=== One Sample Z-Test ===
Sample mean (x̄)    = 10.06 
Population mean μ  = 10 
t-statistic        = 0.7717 
p-value (Trap)     = 0.46042 
p-value (pnorm)    = 0.46005 
alpha              = 0.01 
Critical t value   = 3.2498 

>>> Fail to Reject H0 (Not Significant)

Two Sample Z-test

A random sample of size n1 = 25, taken from a normal population with a standard deviation σ1 = 5.2, has a mean xbar1 = 81. A second random sample of size n2 = 36, taken from a different normal population with a standard deviation σ2 = 3.4, has a mean xbar2 = 76. Test the hypothesis that μ1 = μ2 against the alternative, μ1 != μ2. Quote a P-value in your conclusion

n1 = 25
sd1 = 5.2
xbar1 = 81

n2 = 36
sd2 = 3.4
xbar2 = 76

# H0 : μ1 = μ2
# H1 : μ1 != μ2

TwoSample_Z <- function(xbar1, xbar2, sigma1, sigma2, n1, n2, alpha = 0.05, d0 = 0, type = "two.tail") {
  
  zscore <- (xbar1 - xbar2 - d0) / sqrt((sigma1^2/n1) + (sigma2^2/n2))
  
  PDF_Normal <- function(x) {
    1/sqrt(2*pi) * exp(-x^2/2)
  }
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a) + f(b)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_Normal, abs(zscore), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_Normal, -100, zscore)
  } else if (type == "upper") {
    p_trap <- trap(PDF_Normal, zscore, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pnorm(abs(zscore)))
    crit_val <- qnorm(1 - alpha / 2)
  } else if (type == "lower") {
    p_builtin <- pnorm(zscore)
    crit_val <- qnorm(alpha)
  } else if (type == "upper") {
    p_builtin <- 1 - pnorm(zscore)
    crit_val <- qnorm(1 - alpha)
  }
  
  
  # --- Output Results ---
  cat("=== Two Sample Z-Test ===\n")
  cat("Sample mean (x̄1)   =" ,round(xbar1, 4), "\n")
  cat("Sample mean (x̄2)   =" ,round(xbar2, 4), "\n")
  cat("Z-statistic        =", round(zscore, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical Z value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

TwoSample_Z(xbar1, xbar2, sd1, sd2, n1, n2)
=== Two Sample Z-Test ===
Sample mean (x̄1)   = 81 
Sample mean (x̄2)   = 76 
Z-statistic        = 4.2217 
p-value (Trap)     = 2e-05 
p-value (pnorm)    = 2e-05 
alpha              = 0.05 
Critical Z value   = 1.96 

>>> Reject H0 (Significant)

Two sample t-test, variance is equal

To find out whether a new serum will arrest leukemia, 9 mice, all with an advanced stage of the disease, are selected. Five mice receive the treatment and 4 do not. Survival times, in years, from the time the experiment commenced are as follows:

Treatment = 2.1, 5.3, 1.4, 4.6, 0.9 No Treatment = 1.9, 0.5, 2.8, 3.1

At the 0.05 level of significance, can the serum be said to be effective? Assume the two population to be normally distributed with equal variances.

data1 <- c(2.1, 5.3, 1.4, 4.6, 0.9)
data2 <- c(1.9, 0.5, 2.8, 3.1)

# H0 : The serum is effective (μ1 = μ2)
# H1 : The serum is not effective

TwoSample_t_same <- function(x1, x2, alpha = 0.05, type = "two.tail", d0 = 0) {
  
  # --- define the xbar, s, etc ---
  n1 = length(x1)
  n2 = length (x2)
  x_bar1 = sum(x1)/n1
  x_bar2 = sum(x2)/n2
  s1 = sd(x1)
  s2 = sd(x2)
  v = (n1 + n2) - 2
  sp_2 = (((n1 - 1)*s1^2) + ((n2 - 1)*s2^2)) / v
  sp = sqrt(sp_2)
  
  # --- define the tscore and pdf ---
  
  t_score = (x_bar1 - x_bar2 - d0) / (sp*sqrt((1/n1)+(1/n2)))
  
  PDF_t <- function(t, v) {
  (gamma((v+1)/2) / (sqrt(v*pi) * gamma(v/2))) * (1 + (t^2)/v)^(-(v+1)/2)
}
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a, v) + f(b, v)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h, v)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_t, abs(t_score), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_t, -100, t_score)
  } else if (type == "upper") {
    p_trap <- trap(PDF_t, t_score, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pt(abs(t_score),v))
    crit_val <- qt(1 - alpha / 2,v)
  } else if (type == "lower") {
    p_builtin <- pt(t_score, v)
    crit_val <- qt(alpha,v)
  } else if (type == "upper") {
    p_builtin <- 1 - pt(t_score,v)
    crit_val <- qt(1 - alpha,v)
  }
  
  
  # --- Output Results ---
  cat("=== One Sample Z-Test ===\n")
  cat("Sample mean (x̄1)    =" ,round(x_bar1, 4), "\n")
  cat("Sample mean (x2)    =" ,round(x_bar2, 4), "\n")
  cat("Population mean μ  =", miu, "\n")
  cat("t-statistic        =", round(t_score, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical t value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

TwoSample_t_same(data1, data2)
=== One Sample Z-Test ===
Sample mean (x̄1)    = 2.86 
Sample mean (x2)    = 2.075 
Population mean μ  = 10 
t-statistic        = 0.699 
p-value (Trap)     = 0.50747 
p-value (pnorm)    = 0.50711 
alpha              = 0.05 
Critical t value   = 2.3646 

>>> Fail to Reject H0 (Not Significant)

Two sample t-test, variance is not equal

A study was conducted by the Department of Zoology at Virginia Tech to determine if there is a significant difference in the density of organisms at two different stations located on Cedar Run, a secondary stream in the Roanoke River drainage basin. Sewage from a sewage treatment plant and overflow from the Federal Mogul Corporation settling pond enter the stream near its headwaters. The following data give the density measurements, in number of organisms per square meter, at the two collecting stations

Station 1 = 5030, 13700, 10730, 11400, 860, 2200, 4250, 15040, 4980, 11910, 8130, 26850, 17660, 22800, 1130, 1690 Station 2 = 2800, 4670, 6890, 7720, 7030, 7330, 2810, 1330, 3320, 1230, 2130, 2190

Can we conclude, at the 0.05 level of significance, that the average densities at the two stations are equal? Assume that the observations come from normal populations with different variances.

station1 <- c(5030, 13700, 10730, 11400, 860, 2200, 4250, 15040, 4980, 11910, 8130, 26850, 17660, 22800, 1130, 1690)
station2 <- c(2800, 4670, 6890, 7720, 7030, 7330, 2810, 1330, 3320, 1230, 2130, 2190)

TwoSample_t_diff <- function(x1, x2, alpha = 0.05, type = "two.tail", d0 = 0) {
  
  # --- define the xbar, s, etc ---
  n1 = length(x1)
  n2 = length (x2)
  x_bar1 = sum(x1)/n1
  x_bar2 = sum(x2)/n2
  s1 = sd(x1)
  s2 = sd(x2)
  a = ((s1^2/n1) + (s2^2/n2))^2
  b = (((s1^2/n1)^2)/(n1-1)) + (((s2^2/n2)^2)/(n2-1))
  v = a/b
  
  
  # --- define the tscore and pdf ---
  
  t_score = ((x_bar1 - x_bar2) - d0) / (sqrt((s1^2/n1) + (s2^2/n2)))
  
  PDF_t <- function(t, v) {
  (gamma((v+1)/2) / (sqrt(v*pi) * gamma(v/2))) * (1 + (t^2)/v)^(-(v+1)/2)
}
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a, v) + f(b, v)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h, v)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_t, abs(t_score), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_t, -100, t_score)
  } else if (type == "upper") {
    p_trap <- trap(PDF_t, t_score, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pt(abs(t_score),v))
    crit_val <- qt(1 - alpha / 2,v)
  } else if (type == "lower") {
    p_builtin <- pt(t_score, v)
    crit_val <- qt(alpha,v)
  } else if (type == "upper") {
    p_builtin <- 1 - pt(t_score,v)
    crit_val <- qt(1 - alpha,v)
  }
  
  
  # --- Output Results ---
  cat("=== One Sample Z-Test ===\n")
  cat("Sample mean (x̄1)    =" ,round(x_bar1, 4), "\n")
  cat("Sample mean (x2)    =" ,round(x_bar2, 4), "\n")
  cat("Population mean μ  =", miu, "\n")
  cat("t-statistic        =", round(t_score, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical t value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

TwoSample_t_diff(station1, station2)
=== One Sample Z-Test ===
Sample mean (x̄1)    = 9897.5 
Sample mean (x2)    = 4120.833 
Population mean μ  = 10 
t-statistic        = 2.7578 
p-value (Trap)     = 0.01266 
p-value (pnorm)    = 0.01261 
alpha              = 0.05 
Critical t value   = 2.0947 

>>> Reject H0 (Significant)
---
title: "Latihan ETS (W6)"
output: html_notebook
---

# _**NOTES!!!!!**_
## How to do Hypothesis testing
1. Read the question carefully. what is known?
2. Data 
    a) one sample, n > 30, sd known = Z test
    b) one sample, n < 30, sd unknown = t test
    c) two sample, n > 30, sd known = Z test
    d) two sample, n < 30, sd unknwon = t test
3. Compute the Z or t score
4. Write PDF for z and t distribution
5. use integration method (trapezoidal/simpson)
   -> if using t, inside the function add v(df) in function (f,a,b,n=1000)
  
## Things to remember
A. P (P-value)

   - two tail: 2*1- (P(abs(Ztest)))
   - upper tail: 1 - P(Ztest)
   - lower tail: P(Ztest)
   
B. q (Inverse of P)

   - two tail: q(1-alpha/2)
   - upper tail: q(1-alpha)
   - lower tail: q(alpha)
  
C. P-value integral

  - two tail: a = |Zscore|, b = 100
  - upper tail: a = Zscore, b = 100
  - lower tail: a = -100, b = Zscore





## **One Sample Z test**


An electrical firm manufactures light bulbs that have a lifetime that is approximately normally distributet with a mean of 800 hours and a standard deviation of 40 hours. Test the hypothesis that µ = 800 hours against the alternative, µ != 800 hours, if a random sample of 30 bulbs has an average life of 788 hours. Use a P-value in your answer.

```{r}
n = 30
miu = 800
xbar = 788
sd = 40

#HO : µ = 800
#H1 : µ != 800


# ------ Sample Z-Test Function ---
OneSample_Z <- function(xbar, miu, sigma, n, alpha = 0.05, type = "two.tail") {
  
  zscore <- (xbar - miu) / (sigma / sqrt(n))
  
  PDF_Normal <- function(x) {
    1/sqrt(2*pi) * exp(-x^2/2)
  }
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a) + f(b)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_Normal, abs(zscore), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_Normal, -100, zscore)
  } else if (type == "upper") {
    p_trap <- trap(PDF_Normal, zscore, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pnorm(abs(zscore)))
    crit_val <- qnorm(1 - alpha / 2)
  } else if (type == "lower") {
    p_builtin <- pnorm(zscore)
    crit_val <- qnorm(alpha)
  } else if (type == "upper") {
    p_builtin <- 1 - pnorm(zscore)
    crit_val <- qnorm(1 - alpha)
  }
  
  
  # --- Output Results ---
  cat("=== One Sample Z-Test ===\n")
  cat("Sample mean (x̄)    =" ,round(xbar, 4), "\n")
  cat("Population mean μ  =", miu, "\n")
  cat("Z-statistic        =", round(zscore, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical Z value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

OneSample_Z(xbar, miu, sd, n)
```
## **One Sample t test** 

Test the hypothesis that the average content of containers of a particular lubricant is 10 liters if the contents of a random sample of 10 containers are 10.2, 9.7, 10.1, 10.3, 10.1, 9.8, 9.9, 10.4, 10.3, and 9.8 liters. Use a 0.01 level of significance and assume that the distribution of contents is normal.

```{r}

data <- c(10.2, 9.7, 10.1, 10.3, 10.1, 9.8, 9.9, 10.4, 10.3, 9.8)
miu <- 10



OneSample_t <- function(x, miu, alpha = 0.01, type = "two.tail") {
  
  # --- define the xbar, s, etc ---
  n = length(x)
  x_bar = sum(x)/n
  s = sd(x)
  v = n-1
  
  # --- define the tscore and pdf ---
  
  t_score = (x_bar - miu) / (s/ sqrt(n))
  
  PDF_t <- function(t, v) {
  (gamma((v+1)/2) / (sqrt(v*pi) * gamma(v/2))) * (1 + (t^2)/v)^(-(v+1)/2)
}
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a, v) + f(b, v)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h, v)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_t, abs(t_score), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_t, -100, t_score)
  } else if (type == "upper") {
    p_trap <- trap(PDF_t, t_score, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pt(abs(t_score),v))
    crit_val <- qt(1 - alpha / 2,v)
  } else if (type == "lower") {
    p_builtin <- pt(t_score, v)
    crit_val <- qt(alpha,v)
  } else if (type == "upper") {
    p_builtin <- 1 - pt(t_score,v)
    crit_val <- qt(1 - alpha,v)
  }
  
  
  # --- Output Results ---
  cat("=== One Sample Z-Test ===\n")
  cat("Sample mean (x̄)    =" ,round(x_bar, 4), "\n")
  cat("Population mean μ  =", miu, "\n")
  cat("t-statistic        =", round(t_score, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical t value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

OneSample_t(data, miu)

```
## **Two Sample Z-test**

A random sample of size n1 = 25, taken from a normal population with a standard deviation σ1 = 5.2, has a mean xbar1 = 81. A second random sample of size n2 = 36, taken from a different normal population with a standard deviation σ2 = 3.4, has a mean xbar2 = 76. Test the hypothesis that μ1 = μ2 against the alternative, μ1 != μ2. Quote a P-value in your conclusion

```{r}
n1 = 25
sd1 = 5.2
xbar1 = 81

n2 = 36
sd2 = 3.4
xbar2 = 76

# H0 : μ1 = μ2
# H1 : μ1 != μ2

TwoSample_Z <- function(xbar1, xbar2, sigma1, sigma2, n1, n2, alpha = 0.05, d0 = 0, type = "two.tail") {
  
  zscore <- (xbar1 - xbar2 - d0) / sqrt((sigma1^2/n1) + (sigma2^2/n2))
  
  PDF_Normal <- function(x) {
    1/sqrt(2*pi) * exp(-x^2/2)
  }
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a) + f(b)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_Normal, abs(zscore), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_Normal, -100, zscore)
  } else if (type == "upper") {
    p_trap <- trap(PDF_Normal, zscore, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pnorm(abs(zscore)))
    crit_val <- qnorm(1 - alpha / 2)
  } else if (type == "lower") {
    p_builtin <- pnorm(zscore)
    crit_val <- qnorm(alpha)
  } else if (type == "upper") {
    p_builtin <- 1 - pnorm(zscore)
    crit_val <- qnorm(1 - alpha)
  }
  
  
  # --- Output Results ---
  cat("=== Two Sample Z-Test ===\n")
  cat("Sample mean (x̄1)   =" ,round(xbar1, 4), "\n")
  cat("Sample mean (x̄2)   =" ,round(xbar2, 4), "\n")
  cat("Z-statistic        =", round(zscore, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical Z value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

TwoSample_Z(xbar1, xbar2, sd1, sd2, n1, n2)

```

## **Two sample t-test, variance is equal**

To find out whether a new serum will arrest leukemia, 9 mice, all with an advanced stage of the disease, are selected. Five mice receive the treatment and 4 do not. Survival times, in years, from the time the experiment commenced are as follows: 

Treatment = 2.1, 5.3, 1.4, 4.6, 0.9
No Treatment = 1.9, 0.5, 2.8, 3.1

At the 0.05 level of significance, can the serum be said to be effective? Assume the two population to be normally distributed with equal variances. 
```{r}
data1 <- c(2.1, 5.3, 1.4, 4.6, 0.9)
data2 <- c(1.9, 0.5, 2.8, 3.1)

# H0 : The serum is effective (μ1 = μ2)
# H1 : The serum is not effective

TwoSample_t_same <- function(x1, x2, alpha = 0.05, type = "two.tail", d0 = 0) {
  
  # --- define the xbar, s, etc ---
  n1 = length(x1)
  n2 = length (x2)
  x_bar1 = sum(x1)/n1
  x_bar2 = sum(x2)/n2
  s1 = sd(x1)
  s2 = sd(x2)
  v = (n1 + n2) - 2
  sp_2 = (((n1 - 1)*s1^2) + ((n2 - 1)*s2^2)) / v
  sp = sqrt(sp_2)
  
  # --- define the tscore and pdf ---
  
  t_score = (x_bar1 - x_bar2 - d0) / (sp*sqrt((1/n1)+(1/n2)))
  
  PDF_t <- function(t, v) {
  (gamma((v+1)/2) / (sqrt(v*pi) * gamma(v/2))) * (1 + (t^2)/v)^(-(v+1)/2)
}
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a, v) + f(b, v)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h, v)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_t, abs(t_score), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_t, -100, t_score)
  } else if (type == "upper") {
    p_trap <- trap(PDF_t, t_score, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pt(abs(t_score),v))
    crit_val <- qt(1 - alpha / 2,v)
  } else if (type == "lower") {
    p_builtin <- pt(t_score, v)
    crit_val <- qt(alpha,v)
  } else if (type == "upper") {
    p_builtin <- 1 - pt(t_score,v)
    crit_val <- qt(1 - alpha,v)
  }
  
  
  # --- Output Results ---
  cat("=== One Sample Z-Test ===\n")
  cat("Sample mean (x̄1)    =" ,round(x_bar1, 4), "\n")
  cat("Sample mean (x2)    =" ,round(x_bar2, 4), "\n")
  cat("Population mean μ  =", miu, "\n")
  cat("t-statistic        =", round(t_score, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical t value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

TwoSample_t_same(data1, data2)
```

## **Two sample t-test, variance is not equal**

A study was conducted by the Department of Zoology at Virginia Tech to determine if there is a significant difference in the density of organisms at two different stations located on Cedar Run, a secondary stream in the Roanoke River drainage basin. Sewage from a sewage treatment plant and overflow from the Federal Mogul Corporation settling pond enter the stream near its headwaters. The following data give the density measurements, in number of organisms per square meter, at the two collecting stations

Station 1 = 5030, 13700, 10730, 11400, 860, 2200, 4250, 15040, 4980, 11910, 8130, 26850, 17660, 22800, 1130, 1690
Station 2 = 2800, 4670, 6890, 7720, 7030, 7330, 2810, 1330, 3320, 1230, 2130, 2190

Can we conclude, at the 0.05 level of significance, that the average densities at the two stations are equal? Assume that the observations come from normal populations with different variances.

```{r}
station1 <- c(5030, 13700, 10730, 11400, 860, 2200, 4250, 15040, 4980, 11910, 8130, 26850, 17660, 22800, 1130, 1690)
station2 <- c(2800, 4670, 6890, 7720, 7030, 7330, 2810, 1330, 3320, 1230, 2130, 2190)

TwoSample_t_diff <- function(x1, x2, alpha = 0.05, type = "two.tail", d0 = 0) {
  
  # --- define the xbar, s, etc ---
  n1 = length(x1)
  n2 = length (x2)
  x_bar1 = sum(x1)/n1
  x_bar2 = sum(x2)/n2
  s1 = sd(x1)
  s2 = sd(x2)
  a = ((s1^2/n1) + (s2^2/n2))^2
  b = (((s1^2/n1)^2)/(n1-1)) + (((s2^2/n2)^2)/(n2-1))
  v = a/b
  
  
  # --- define the tscore and pdf ---
  
  t_score = ((x_bar1 - x_bar2) - d0) / (sqrt((s1^2/n1) + (s2^2/n2)))
  
  PDF_t <- function(t, v) {
  (gamma((v+1)/2) / (sqrt(v*pi) * gamma(v/2))) * (1 + (t^2)/v)^(-(v+1)/2)
}
  
  # --- Trapezoidal function to find PDF ---
  trap <- function (f, a, b, n = 1000) {
      h = (b - a) / n
      integral = (f(a, v) + f(b, v)) / 2
      for (i in 1:(n-1)) {
        integral = integral + f(a + i * h, v)
      }
      
      integral = integral * h 
      return(integral)
  }
  
  
  # --- Manual integration using Trapezoidal ---
  if (type == "two.tail") {
    p_trap <- 2 * trap(PDF_t, abs(t_score), 100)
  } else if (type == "lower") {
    p_trap <- trap(PDF_t, -100, t_score)
  } else if (type == "upper") {
    p_trap <- trap(PDF_t, t_score, 100)
  }
  
  
  # --- Built-in pnorm comparison ---
  if (type == "two.tail") {
    p_builtin <- 2 * (1 - pt(abs(t_score),v))
    crit_val <- qt(1 - alpha / 2,v)
  } else if (type == "lower") {
    p_builtin <- pt(t_score, v)
    crit_val <- qt(alpha,v)
  } else if (type == "upper") {
    p_builtin <- 1 - pt(t_score,v)
    crit_val <- qt(1 - alpha,v)
  }
  
  
  # --- Output Results ---
  cat("=== One Sample Z-Test ===\n")
  cat("Sample mean (x̄1)    =" ,round(x_bar1, 4), "\n")
  cat("Sample mean (x2)    =" ,round(x_bar2, 4), "\n")
  cat("Population mean μ  =", miu, "\n")
  cat("t-statistic        =", round(t_score, 4), "\n")
  cat("p-value (Trap)     =", round(p_trap, 5), "\n")
  cat("p-value (pnorm)    =", round(p_builtin, 5), "\n")
  cat("alpha              =", alpha, "\n")
  cat("Critical t value   =", round(crit_val, 4), "\n\n")
  
  if (p_builtin < alpha) {
    cat(">>> Reject H0 (Significant)\n")
  } else {
    cat(">>> Fail to Reject H0 (Not Significant)\n")
  }
}

TwoSample_t_diff(station1, station2)
```

