This report presents a simulation exercise comparing the distribution of sample means and variances to their theoretical counterparts. It also includes a basic inferential data analysis of the ToothGrowth dataset.
# Load necessary libraries
library(ggplot2)
## Warning: 程序包'ggplot2'是用R版本4.4.3 来建造的
# Set lambda to 0.2
lambda <- 0.2
# Simulate the distribution of the mean of 40 exponential random variables
set.seed(123) # Set a random seed for reproducibility
mns <- numeric(1000)
for (i in 1:1000) {
mns[i] <- mean(rexp(40, lambda))
}
# Plot the distribution of sample means
hist(mns, main="Distribution of Sample Means", xlab="Sample Mean", col="lightblue", border="black")
abline(v=1/lambda, col="red", lwd=2) # Theoretical mean
# Save the plot
ggsave("sample_means_distribution.png")
## Saving 7 x 5 in image
# Calculate the sample variances
vars <- numeric(1000)
for (i in 1:1000) {
vars[i] <- var(rexp(40, lambda))
}
# Plot the distribution of sample variances
hist(vars, main="Distribution of Sample Variances", xlab="Sample Variance", col="lightgreen", border="black")
abline(v=(1/lambda)^2, col="red", lwd=2) # Theoretical variance
# Save the plot
ggsave("sample_variances_distribution.png")
## Saving 7 x 5 in image
# Display the comparison of sample mean and theoretical mean
print(paste("Sample Mean:", mean(mns)))
## [1] "Sample Mean: 5.01191127862178"
print(paste("Theoretical Mean:", 1/lambda))
## [1] "Theoretical Mean: 5"
# Display the comparison of sample variance and theoretical variance
print(paste("Sample Variance:", mean(vars)))
## [1] "Sample Variance: 24.7931154965223"
print(paste("Theoretical Variance:", (1/lambda)^2))
## [1] "Theoretical Variance: 25"
# Load the ToothGrowth dataset
data("ToothGrowth")
# Provide a basic summary of the data
summary(ToothGrowth)
## len supp dose
## Min. : 4.20 OJ:30 Min. :0.500
## 1st Qu.:13.07 VC:30 1st Qu.:0.500
## Median :19.25 Median :1.000
## Mean :18.81 Mean :1.167
## 3rd Qu.:25.27 3rd Qu.:2.000
## Max. :33.90 Max. :2.000
# Basic exploratory data analysis
plot(ToothGrowth$len, ToothGrowth$dose, col=ToothGrowth$supp, pch=19, xlab="Dose", ylab="Tooth Length")
legend("topright", legend=levels(ToothGrowth$supp), col=1:2, pch=19)
# Save the plot
ggsave("toothgrowth_scatterplot.png")
## Saving 7 x 5 in image
# Analyze the data using a linear model
model <- lm(len ~ dose + supp, data=ToothGrowth)
summary(model)
##
## Call:
## lm(formula = len ~ dose + supp, data = ToothGrowth)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.600 -3.700 0.373 2.116 8.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.2725 1.2824 7.231 1.31e-09 ***
## dose 9.7636 0.8768 11.135 6.31e-16 ***
## suppVC -3.7000 1.0936 -3.383 0.0013 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.236 on 57 degrees of freedom
## Multiple R-squared: 0.7038, Adjusted R-squared: 0.6934
## F-statistic: 67.72 on 2 and 57 DF, p-value: 8.716e-16
# Confidence intervals
confint(model)
## 2.5 % 97.5 %
## (Intercept) 6.704608 11.840392
## dose 8.007741 11.519402
## suppVC -5.889905 -1.510095
# Hypothesis testing
t.test(len ~ supp, data=ToothGrowth)
##
## Welch Two Sample t-test
##
## data: len by supp
## t = 1.9153, df = 55.309, p-value = 0.06063
## alternative hypothesis: true difference in means between group OJ and group VC is not equal to 0
## 95 percent confidence interval:
## -0.1710156 7.5710156
## sample estimates:
## mean in group OJ mean in group VC
## 20.66333 16.96333