#referencia :http://www.cyclismo.org/tutorial/R/hwI.html
# En este ejemplo examinaremos la data correspondiente
# al monoxido de carbono emitido por 46 diferentes motores
# Primero transformaremos la data para acercarla a lo que
# seria una distribucion normal. Luego calcularemos los
# limites de confianza del average (mean) y realizaremos
# una prueba de significancia para evaluar si la data
# esta cercana o alejada a un estandar prefijado. Finalmente
# encontraremos el "power of the test" para detectar una
# una diferencia fija del estandar. Asumiremos limites
# de confianza del 95%
engine<- read.csv("C:/Users/Luis/Desktop/case_study/table_7_3.csv")
names(engine)
## [1] "en" "hc" "co" "nox"
summary(engine)
## en hc co nox
## Min. : 1.00 Min. :0.3400 Min. : 1.850 Min. :0.490
## 1st Qu.:12.75 1st Qu.:0.4375 1st Qu.: 4.388 1st Qu.:1.110
## Median :24.50 Median :0.5100 Median : 5.905 Median :1.315
## Mean :24.00 Mean :0.5502 Mean : 7.879 Mean :1.340
## 3rd Qu.:35.25 3rd Qu.:0.6025 3rd Qu.:10.015 3rd Qu.:1.495
## Max. :46.00 Max. :1.1000 Max. :23.530 Max. :2.940
hist(engine$co)

qqnorm(engine$co, main = "Carbon Monoxide")
qqline(engine$co)

boxplot(engine$co,main="Carbon Monoxide")

# transformando la data
lenengine <- log(engine$co)
boxplot(lenengine,main="Log Carbon Moxide")

hist(lenengine,main = "Log Carbon Monoxide")

qqnorm(lenengine,main = "QQ plot for log of CO")
qqline(lenengine)

# Los limites de confianza
m <- mean(lenengine)
s <- sd(lenengine)
n <- length(lenengine)
se <- s/sqrt(n)
# ?qt
error <- se*qt(0.975,df=n-1)
error
## [1] 0.1737529
left <- m-error
right <- m+error
left
## [1] 1.709925
right
## [1] 2.057431
exp(left)# limite de confianza por la izquierda
## [1] 5.528548
exp(right)# limite de confianza por la derecha
## [1] 7.82584
# Test de significancia
# asumimos que el verdadero promedio (mean) es igual a 5.4
lNULL <- left-error
rNULL <- left+error
lNULL
## [1] 1.536172
rNULL
## [1] 1.883678
m
## [1] 1.883678
log(5.4)-error
## [1] 1.512646
log(5.4)+error
## [1] 1.860152
m
## [1] 1.883678
pvalue <- 2*(1-pt((m-log(5.4))/se,df=n-1))
pvalue#Since the p-value is 2.7% which is less than 5% we can reject the null hypothesis
## [1] 0.02692539
# an alternative
# El power of test
# manera No.1
tLeft <- (lNULL-log(7))/(s/sqrt(n))
tRight <- (rNULL-log(7))/(s/sqrt(n))
p <- pt(tRight,df=n-1) - pt(tLeft,df=n-1)
p
## [1] 0.237373
1-p
## [1] 0.762627
# manera No.2
t <- qt(0.975,df=n-1)
shift <- (log(5.4)-log(7))/(s/sqrt(n))
pt(t,df=n-1,ncp=shift)-pt(-t,df=n-1,ncp=shift)
## [1] 0.1628579
1-(pt(t,df=n-1,ncp=shift)-pt(-t,df=n-1,ncp=shift))
## [1] 0.8371421
# Manera No.3
t.test(lenengine,mu=log(5.4),alternative = "two.sided")
##
## One Sample t-test
##
## data: lenengine
## t = 2.2841, df = 47, p-value = 0.02693
## alternative hypothesis: true mean is not equal to 1.686399
## 95 percent confidence interval:
## 1.709925 2.057431
## sample estimates:
## mean of x
## 1.883678
power.t.test(n=n,delta=log(7)-log(5.4),sd=s,sig.level=0.05,
type="one.sample",alternative="two.sided",strict = TRUE)
##
## One-sample t test power calculation
##
## n = 48
## delta = 0.2595112
## sd = 0.5983851
## sig.level = 0.05
## power = 0.8371421
## alternative = two.sided
# La probabilidad de cometer un error tipo II es II error
# es aprox. 16.3%, y la probabilidad de detectar una diferencia si
# el nivel es realmente 7 es aprox. 83.7%.