R Markdown
# PROBLEM 1
data <-c(26.4,23.5,25.4,22.9,25.2,39.2,25.5,31.9,26.0,44.6,35.5,38.6,
30.1,31.0,30.8,32.8,47.7,39.1,55.3,50.7,73.8,71.1,68.4,77.1,
19.4,19.3,18.7,19.0,23.2,21.3,23.2,19.9,18.9,19.8,19.6,21.9)
sample <- data
hist(sample)

sample.sort <-sort(sample) #sort data increasing
rank <- rank(sample.sort) #rank data from 1 to 36
size <- length(sample.sort) # size of data
p <- (rank-.5)/size #cummulative prob of data
z.quantile <- qnorm(p) # Standard Normal quantiles with such probability
plot(x=z.quantile, y=sample.sort, pch=16, main="QQ Plot") #scatterplot of x=Z quantiles, y= data sorted
abline(lm(sample.sort ~ z.quantile))

# Question: Observe the pattern of the points? Try qqnorm(); qqline()
# Generate QQ plot
qqnorm(sample, main = "QQ Plot (Using qqnorm)")
qqline(sample, col = "red", lwd = 2) # Adds a reference line

# Conclusion : The data does not perfectly follow a normal distribution, as there are noticeable deviations in the tails. Right skewness mean heavier right tails
# PROBLEM 2
sample <-c(26.4,23.5,25.4,22.9,25.2,39.2,25.5,31.9,26.0,44.6,35.5,38.6,
30.1,31.0,30.8,32.8,47.7,39.1,55.3,50.7,73.8,71.1,68.4,77.1,
19.4,19.3,18.7,19.0,23.2,21.3,23.2,19.9,18.9,19.8,19.6,21.9)
# (a) Write a 95%-CI for the population mean.
n <- length(sample) # Sample size
x_bar <- mean(sample) # Sample mean
s <- sd(sample) # Sample standard deviation
alpha <- 0.05 # Significance level
df <- n - 1 # Degrees of freedom
t_value <- qt(1 - alpha / 2, df) # Critical t-value
# Compute the margin of error
margin_of_error <- t_value * s / sqrt(n)
# Confidence interval for the mean
CI_mean <- c(x_bar - margin_of_error, x_bar + margin_of_error)
print(paste("CI for the mean ", paste(round(CI_mean,3), collapse = " to ")))
## [1] "CI for the mean 28.083 to 39.517"
# What assumption about population for the work, suppose the sample is random.
# --> Answer: The population data is Large or Infinite, the population is independent and identically distributed (iid). It often assumed that the population follows a normal distribution, especially for smaller sample sizes.
# (b) Write a 95%-CI for the population variance
s2 <- var(sample)
chi_sq_lower <- qchisq(1-alpha/2, df)
chi_sq_upper <- qchisq(alpha/2, df)
CI_variance <- c((df*s2)/chi_sq_lower, (df*s2)/chi_sq_upper)
print(paste("95%- CI for the population variance ", paste(round(CI_variance,3), collapse = " to ")))
## [1] "95%- CI for the population variance 187.834 to 485.838"
# (c) Write a 95%- CI for the population standard deviation.
CI_std <- sqrt(CI_variance)
print(paste("95%- CI for the population standard deviation ", paste(round(CI_std,3), collapse = " to ")))
## [1] "95%- CI for the population standard deviation 13.705 to 22.042"
# PROBLEM 3
library(faraway)
## Warning: package 'faraway' was built under R version 4.3.3
## Warning in check_dep_version(): ABI version mismatch:
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package
data(stat500, package = "faraway")
head(stat500)
## midterm final hw total
## 1 24.5 26.0 28.5 79.0
## 2 22.5 24.5 28.2 75.2
## 3 23.5 26.5 28.3 78.3
## 4 23.5 34.5 29.2 87.2
## 5 22.5 30.5 27.3 80.3
## 6 16.0 31.0 27.5 74.5
dim(stat500)
## [1] 55 4
#C <- matrix(rep(1,9), ncol = 3, nrow = 3)
# Define sample size
n <- nrow(stat500)
# Create vector of ones
ones <- rep(1, n)
# Create matrix M
M <- ones %*% t(ones)
# Create centering matrix C
Id <- diag(n)
C <- Id - (1/n) * M
# Covariance matrix
# Extract numerical columns
#X <- as.matrix(stat500[, sapply(stat500, is.numeric)]) # Select numeric columns
#X <- as.matrix(stat500[, sapply(stat500, is.numeric)][, 1:3])
X <- cbind(stat500$midterm, stat500$final, stat500$hw)
# Compute (CX)^T (CX)
CX <- C %*% X # Centered data matrix
Cov_X <- (t(CX) %*% CX) / (n - 1) # Covariance matrix
# Display Covariance Matrix
print(Cov_X)
## [,1] [,2] [,3]
## [1,] 22.994108 12.95202 5.219192
## [2,] 12.952020 24.54158 1.730960
## [3,] 5.219192 1.73096 16.005468
# Correlation matrix
# Compute D (diagonal matrix of inverse standard deviations)
D <- diag(1 / apply(X, 2, sd)) # Diagonal matrix with 1/std deviations
# Compute Correlation Matrix
Cor_X <- D %*% Cov_X %*% D
# Display Correlation Matrix
print(Cor_X)
## [,1] [,2] [,3]
## [1,] 1.0000000 0.54522775 0.27205756
## [2,] 0.5452277 1.00000000 0.08733764
## [3,] 0.2720576 0.08733764 1.00000000