R Markdown

# PROBLEM 1

data <-c(26.4,23.5,25.4,22.9,25.2,39.2,25.5,31.9,26.0,44.6,35.5,38.6,
         30.1,31.0,30.8,32.8,47.7,39.1,55.3,50.7,73.8,71.1,68.4,77.1,
         19.4,19.3,18.7,19.0,23.2,21.3,23.2,19.9,18.9,19.8,19.6,21.9)

sample <- data
hist(sample)

sample.sort <-sort(sample)                   #sort data increasing
rank <- rank(sample.sort)                 #rank data from 1 to 36
size <- length(sample.sort)               # size of data

p <- (rank-.5)/size                    #cummulative prob of data
z.quantile <- qnorm(p)            # Standard Normal quantiles with such probability
plot(x=z.quantile, y=sample.sort, pch=16, main="QQ Plot")   #scatterplot of x=Z quantiles, y= data sorted

abline(lm(sample.sort ~ z.quantile))

# Question: Observe the pattern of the points? Try qqnorm(); qqline()


# Generate QQ plot
qqnorm(sample, main = "QQ Plot (Using qqnorm)")
qqline(sample, col = "red", lwd = 2)  # Adds a reference line

# Conclusion : The data does not perfectly follow a normal distribution, as there are noticeable deviations in the tails. Right skewness mean heavier right tails
# PROBLEM 2

sample <-c(26.4,23.5,25.4,22.9,25.2,39.2,25.5,31.9,26.0,44.6,35.5,38.6,
         30.1,31.0,30.8,32.8,47.7,39.1,55.3,50.7,73.8,71.1,68.4,77.1,
         19.4,19.3,18.7,19.0,23.2,21.3,23.2,19.9,18.9,19.8,19.6,21.9)

# (a)   Write a 95%-CI for the population mean.  

n <- length(sample)               # Sample size
x_bar <- mean(sample)             # Sample mean
s <- sd(sample)                   # Sample standard deviation
alpha <- 0.05                     # Significance level
df <- n - 1                       # Degrees of freedom
t_value <- qt(1 - alpha / 2, df)  # Critical t-value

# Compute the margin of error
margin_of_error <- t_value * s / sqrt(n)

# Confidence interval for the mean
CI_mean <- c(x_bar - margin_of_error, x_bar + margin_of_error)
print(paste("CI for the mean ", paste(round(CI_mean,3), collapse = " to ")))
## [1] "CI for the mean  28.083 to 39.517"
# What assumption about population for the work, suppose the sample is random.

# --> Answer: The population data is Large or Infinite, the population is independent and identically distributed (iid). It often assumed that the population follows a normal distribution, especially for smaller sample sizes. 

#  (b)  Write a 95%-CI for the population variance

s2 <- var(sample)
chi_sq_lower <- qchisq(1-alpha/2, df)
chi_sq_upper <- qchisq(alpha/2, df)

CI_variance <- c((df*s2)/chi_sq_lower, (df*s2)/chi_sq_upper)
print(paste("95%- CI for the population  variance ", paste(round(CI_variance,3), collapse = " to ")))
## [1] "95%- CI for the population  variance  187.834 to 485.838"
#  (c)  Write a 95%- CI for the population standard deviation.

CI_std <- sqrt(CI_variance)
print(paste("95%- CI for the population standard deviation ", paste(round(CI_std,3), collapse = " to ")))
## [1] "95%- CI for the population standard deviation  13.705 to 22.042"
# PROBLEM 3 

library(faraway)
## Warning: package 'faraway' was built under R version 4.3.3
## Warning in check_dep_version(): ABI version mismatch: 
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package
data(stat500, package = "faraway")
head(stat500)
##   midterm final   hw total
## 1    24.5  26.0 28.5  79.0
## 2    22.5  24.5 28.2  75.2
## 3    23.5  26.5 28.3  78.3
## 4    23.5  34.5 29.2  87.2
## 5    22.5  30.5 27.3  80.3
## 6    16.0  31.0 27.5  74.5
dim(stat500)
## [1] 55  4
#C <- matrix(rep(1,9), ncol = 3, nrow = 3)

# Define sample size
n <- nrow(stat500) 

# Create vector of ones
ones <- rep(1, n)

# Create matrix M
M <- ones %*% t(ones)

# Create centering matrix C
Id <- diag(n)
C <- Id - (1/n) * M

# Covariance matrix
# Extract numerical columns
#X <- as.matrix(stat500[, sapply(stat500, is.numeric)])  # Select numeric columns

#X <- as.matrix(stat500[, sapply(stat500, is.numeric)][, 1:3])

X <- cbind(stat500$midterm, stat500$final, stat500$hw)

# Compute (CX)^T (CX)
CX <- C %*% X  # Centered data matrix
Cov_X <- (t(CX) %*% CX) / (n - 1)  # Covariance matrix

# Display Covariance Matrix
print(Cov_X)
##           [,1]     [,2]      [,3]
## [1,] 22.994108 12.95202  5.219192
## [2,] 12.952020 24.54158  1.730960
## [3,]  5.219192  1.73096 16.005468
# Correlation matrix
# Compute D (diagonal matrix of inverse standard deviations)
D <- diag(1 / apply(X, 2, sd))  # Diagonal matrix with 1/std deviations

# Compute Correlation Matrix
Cor_X <- D %*% Cov_X %*% D

# Display Correlation Matrix
print(Cor_X)
##           [,1]       [,2]       [,3]
## [1,] 1.0000000 0.54522775 0.27205756
## [2,] 0.5452277 1.00000000 0.08733764
## [3,] 0.2720576 0.08733764 1.00000000