CTL_unif <- function(n, m) {
replicate(n, mean(runif(m, 0, 1)))
}
# example
CTL_unif(n = 5, m = 10)
## [1] 0.4966503 0.3859125 0.4009755 0.4927957 0.4963499
set.seed(123) # Set seed for reproducibility
# Generate sample means
data_30 <- CTL_unif(n = 500, m = 30)
data_200 <- CTL_unif(n = 500, m = 200)
# Define theoretical normal distributions
x_vals <- seq(0.3, 0.7, length.out = 100)
y_30 <- dnorm(x_vals, mean = 1/2, sd = sqrt(1 / (12 * 30)))
y_200 <- dnorm(x_vals, mean = 1/2, sd = sqrt(1 / (12 * 200)))
# Plot histograms side by side
par(mfrow = c(1, 2))
hist(data_30, probability = TRUE, main = "Histogram of Means (m=30)", xlab = "Sample Means", xlim = c(0.3, 0.7))
lines(x_vals, y_30, col = "blue", lwd = 2)
hist(data_200, probability = TRUE, main = "Histogram of Means (m=200)", xlab = "Sample Means", xlim = c(0.3, 0.7))
lines(x_vals, y_200, col = "red", lwd = 2)
As m increases from 30 to 200, the distribution of
sample means becomes more concentrated around 0.5, aligning more closely
with the normal distribution predicted by the Central Limit Theorem.
data("airquality")
# Correct method to check for NAs
sum(is.na(airquality$Ozone)) / nrow(airquality) # Proportion of missing values
## [1] 0.2418301
# Compute mean excluding NAs
mean(airquality$Ozone, na.rm = TRUE)
## [1] 42.12931
# Remove rows with any missing values
airquality_clean <- na.omit(airquality)
# Remove Month and Day columns
airquality_clean <- airquality_clean[, !(names(airquality_clean) %in% c("Month", "Day"))]
# Summary statistics
summary(airquality_clean)
## Ozone Solar.R Wind Temp
## Min. : 1.0 Min. : 7.0 Min. : 2.30 Min. :57.00
## 1st Qu.: 18.0 1st Qu.:113.5 1st Qu.: 7.40 1st Qu.:71.00
## Median : 31.0 Median :207.0 Median : 9.70 Median :79.00
## Mean : 42.1 Mean :184.8 Mean : 9.94 Mean :77.79
## 3rd Qu.: 62.0 3rd Qu.:255.5 3rd Qu.:11.50 3rd Qu.:84.50
## Max. :168.0 Max. :334.0 Max. :20.70 Max. :97.00
# Boxplots for each variable
par(mfrow = c(2, 2))
for (col in colnames(airquality_clean)) {
boxplot(airquality_clean[[col]], main = col, xlab = col)
}
# Compute correlation between Ozone and Temp
cor_ozone_temp <- cor(airquality_clean$Ozone, airquality_clean$Temp)
# Compute correlation between sqrt(Ozone) and Temp
cor_sqrt_ozone_temp <- cor(sqrt(airquality_clean$Ozone), airquality_clean$Temp)
# Scatter plots
par(mfrow = c(1, 2))
plot(airquality_clean$Ozone, airquality_clean$Temp, main = "Ozone vs Temp", xlab = "Ozone", ylab = "Temp", col = "blue")
plot(sqrt(airquality_clean$Ozone), airquality_clean$Temp, main = "sqrt(Ozone) vs Temp", xlab = "sqrt(Ozone)", ylab = "Temp", col = "red")