#Problem1

#Monte Carlo Simulation for CLT using Beta(alpha, beta=1)
set.seed(123) 

M <- 1000                    #number of Monte Carlo replicates
n_values <- c(5, 30, 100)    # given sample sizes
alpha_values <- c(0.05, 0.5, 1, 5)  #given  shape parameter alpha
beta <- 1                    

par(mfrow = c(length(alpha_values), length(n_values)), mar = c(3,3,2,1))

for (alpha in alpha_values) {
  for (n in n_values) {
    means <- numeric(M)  #storing sample means
    
    for (m in 1:M) {
      y <- rbeta(n, alpha, beta)
      means[m] <- mean(y)
    }
    
#CLT approximation
    mean_theoretical <- alpha / (alpha + beta)
    var_theoretical <- (alpha * beta) / ((alpha + beta)^2 * (alpha + beta + 1))
    sd_theoretical <- sqrt(var_theoretical / n)
    
       #Histograms with Normal overlay
    hist(means, probability = TRUE, 
         main = paste("α =", alpha, ", n =", n),
         xlab = "Sample mean", col = "lightblue", border = "white")
    curve(dnorm(x, mean_theoretical, sd_theoretical), 
          col = "red", lwd = 2, add = TRUE)
  }
}

#Problem 3

data(sleep)
head(sleep)
##   extra group ID
## 1   0.7     1  1
## 2  -1.6     1  2
## 3  -0.2     1  3
## 4  -1.2     1  4
## 5  -0.1     1  5
## 6   3.4     1  6
View(sleep)
#Cohen's d
control <- sleep$extra[sleep$group == 1]
treatment <- sleep$extra[sleep$group == 2]

mean_control <- mean(control)
mean_treatment <- mean(treatment)
pooled_sd <- sqrt((var(control) + var(treatment)) / 2)

cohens_d <- (mean_treatment - mean_control) / pooled_sd
cohens_d
## [1] 0.8321811
#pearsonian correlation coeff
data(Loblolly)
head(Loblolly)
##    height age Seed
## 1    4.51   3  301
## 15  10.89   5  301
## 29  28.72  10  301
## 43  41.74  15  301
## 57  52.70  20  301
## 71  60.92  25  301
View(Loblolly)
correlation <- cor(Loblolly$age, Loblolly$height)
correlation
## [1] 0.9899132
#odd's ratio
data(Titanic)
Titanic
## , , Age = Child, Survived = No
## 
##       Sex
## Class  Male Female
##   1st     0      0
##   2nd     0      0
##   3rd    35     17
##   Crew    0      0
## 
## , , Age = Adult, Survived = No
## 
##       Sex
## Class  Male Female
##   1st   118      4
##   2nd   154     13
##   3rd   387     89
##   Crew  670      3
## 
## , , Age = Child, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st     5      1
##   2nd    11     13
##   3rd    13     14
##   Crew    0      0
## 
## , , Age = Adult, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st    57    140
##   2nd    14     80
##   3rd    75     76
##   Crew  192     20
#Contingency table
titanic_table <- apply(Titanic, c(2,4), sum)  
gender_survival <- titanic_table["Male", ] + titanic_table["Female", ]

# From the contingency table we can observe that Male died: 1364, Male survived: 367
#Female died: 126, Female survived: 344

odds_male <- 367 / 1364    #odds of survival for males
odds_female <- 344 / 126   # odds of survival for females
odds_ratio <- odds_female / odds_male
odds_ratio
## [1] 10.14697