This is my final project for the statistical inference course. The project has two parts:
As part of this project, I will be investigating the exponential distribution in R and comparing it with the CLT (central limit theorem). The R function rexp(n, lambda) simulates the explonential distribution. In that function, lanbda is the rate parameter. The mean and the standard deviation of the exponential distiribution is 1/lambda. One thousand simulations were conducted with lambda set at 0.2, ivestigating the distribution of averages of 40 exponentials.
set.seed(1250)
n <- 40 #number of exponentials
Sims <- 1000 #number of simulations
Lambda <- 0.2
rsim <- replicate(Sims, rexp(n, Lambda)) #the simulations
SampleMean <- mean(rsim)
msim <- apply(rsim, 2, mean) #mean of expoential simulations
hist(msim, breaks=40, xlim=c(2,9), main="Simulations Exponential Function Distribution", col=blues9)
hist(msim, col="green4", main="Sample vs. Theoretical Means Comparison", breaks=20)
abline(v=mean(msim), lwd="4", col="orange3")
text(3.4, 225, paste("Sample mean = ", round(mean(msim),2), "\n Theoretical mean = 5"), col="purple4")
print(paste("Sample variance: ", round(sd(msim)^2 ,4)))
## [1] "Sample variance: 0.6416"
print(paste("Theoretical variance: ", round( ((1/Lambda)/sqrt(n))^2 ,4)))
## [1] "Theoretical variance: 0.625"
hist(msim, prob=TRUE, col=blues9, main="Exponential Mean Distribution Assessment", breaks=n)
x <- seq(min(msim), max(msim), length = 100)
lines(x, dnorm(x, mean = 1/Lambda, sd = (1/Lambda/sqrt(n))), pch = 25, col = "orange3")
qqnorm(msim)
qqline(msim, col = "gray")
library(datasets)
data(ToothGrowth)
library(ggplot2)
library(RColorBrewer)
library(wesanderson)
str(ToothGrowth)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
summary(ToothGrowth)
## len supp dose
## Min. : 4.20 OJ:30 Min. :0.500
## 1st Qu.:13.07 VC:30 1st Qu.:0.500
## Median :19.25 Median :1.000
## Mean :18.81 Mean :1.167
## 3rd Qu.:25.27 3rd Qu.:2.000
## Max. :33.90 Max. :2.000
ggplot(data=ToothGrowth, aes(x=as.factor(dose), y=len, fill=supp)) +
geom_bar(stat="identity") +
scale_fill_manual(values=wes_palette(n=2, name="Zissou1")) +
facet_grid(. ~ supp) +
xlab("Dose(mg)") +
ylab("Tooth length")
hypoth1 <- t.test(len ~ supp, data = ToothGrowth)
hypoth1$conf.int
## [1] -0.1710156 7.5710156
## attr(,"conf.level")
## [1] 0.95
hypoth1$p.value
## [1] 0.06063451
hypoth2<-t.test(len ~ supp, data = subset(ToothGrowth, dose == 0.5))
hypoth2$conf.int
## [1] 1.719057 8.780943
## attr(,"conf.level")
## [1] 0.95
hypoth2$p.value
## [1] 0.006358607
hypoth3<-t.test(len ~ supp, data = subset(ToothGrowth, dose == 1))
hypoth3$conf.int
## [1] 2.802148 9.057852
## attr(,"conf.level")
## [1] 0.95
hypoth3$p.value
## [1] 0.001038376
hypoth4<-t.test(len ~ supp, data = subset(ToothGrowth, dose == 2))
hypoth4$conf.int
## [1] -3.79807 3.63807
## attr(,"conf.level")
## [1] 0.95
hypoth4$p.value
## [1] 0.9638516