#referencia :http://www.cyclismo.org/tutorial/R/hwI.html
# En este ejemplo examinaremos la data correspondiente
# al monoxido de carbono emitido por 46 diferentes motores
# Primero transformaremos la data para acercarla a lo que
# seria una distribucion normal. Luego calcularemos los
# limites de confianza del average (mean) y realizaremos 
# una prueba de significancia para evaluar si la data 
# esta cercana o alejada a un estandar prefijado. Finalmente
# encontraremos el "power of the test" para detectar una
# una diferencia fija del estandar. Asumiremos limites
# de confianza del 95%
engine<- read.csv("C:/Users/Luis/Desktop/case_study/table_7_3.csv")
names(engine)
## [1] "en"  "hc"  "co"  "nox"
summary(engine)
##        en              hc               co              nox       
##  Min.   : 1.00   Min.   :0.3400   Min.   : 1.850   Min.   :0.490  
##  1st Qu.:12.75   1st Qu.:0.4375   1st Qu.: 4.388   1st Qu.:1.110  
##  Median :24.50   Median :0.5100   Median : 5.905   Median :1.315  
##  Mean   :24.00   Mean   :0.5502   Mean   : 7.879   Mean   :1.340  
##  3rd Qu.:35.25   3rd Qu.:0.6025   3rd Qu.:10.015   3rd Qu.:1.495  
##  Max.   :46.00   Max.   :1.1000   Max.   :23.530   Max.   :2.940
hist(engine$co)

qqnorm(engine$co, main = "Carbon Monoxide")
qqline(engine$co)

boxplot(engine$co,main="Carbon Monoxide")

# transformando la data
lenengine <- log(engine$co)
boxplot(lenengine,main="Log Carbon Moxide")

hist(lenengine,main = "Log Carbon Monoxide")

qqnorm(lenengine,main = "QQ plot for log of CO")
qqline(lenengine)

# Los limites de confianza
m <- mean(lenengine)
s <- sd(lenengine)
n <- length(lenengine)
se <- s/sqrt(n)
# ?qt
error <- se*qt(0.975,df=n-1)
error
## [1] 0.1737529
left <- m-error
right <- m+error
left
## [1] 1.709925
right
## [1] 2.057431
exp(left)# limite de confianza por la izquierda
## [1] 5.528548
exp(right)# limite de confianza por la derecha
## [1] 7.82584
# Test de significancia
# asumimos que el verdadero promedio (mean) es igual a 5.4

lNULL <- left-error
rNULL <- left+error
lNULL
## [1] 1.536172
rNULL
## [1] 1.883678
m
## [1] 1.883678
log(5.4)-error
## [1] 1.512646
log(5.4)+error
## [1] 1.860152
m
## [1] 1.883678
pvalue <- 2*(1-pt((m-log(5.4))/se,df=n-1))
pvalue#Since the p-value is 2.7% which is less than 5% we can reject the null hypothesis
## [1] 0.02692539
# an alternative
# El power of test
# manera No.1
tLeft <- (lNULL-log(7))/(s/sqrt(n))
tRight <- (rNULL-log(7))/(s/sqrt(n))
p <- pt(tRight,df=n-1) - pt(tLeft,df=n-1)
p
## [1] 0.237373
1-p
## [1] 0.762627
# manera No.2
t <- qt(0.975,df=n-1)
shift <- (log(5.4)-log(7))/(s/sqrt(n))
pt(t,df=n-1,ncp=shift)-pt(-t,df=n-1,ncp=shift)
## [1] 0.1628579
1-(pt(t,df=n-1,ncp=shift)-pt(-t,df=n-1,ncp=shift))
## [1] 0.8371421
# Manera No.3
t.test(lenengine,mu=log(5.4),alternative = "two.sided")
## 
##  One Sample t-test
## 
## data:  lenengine
## t = 2.2841, df = 47, p-value = 0.02693
## alternative hypothesis: true mean is not equal to 1.686399
## 95 percent confidence interval:
##  1.709925 2.057431
## sample estimates:
## mean of x 
##  1.883678
power.t.test(n=n,delta=log(7)-log(5.4),sd=s,sig.level=0.05,
type="one.sample",alternative="two.sided",strict = TRUE)
## 
##      One-sample t test power calculation 
## 
##               n = 48
##           delta = 0.2595112
##              sd = 0.5983851
##       sig.level = 0.05
##           power = 0.8371421
##     alternative = two.sided
# La probabilidad de cometer un error tipo II es II error
# es aprox. 16.3%, y la probabilidad de detectar una diferencia si
# el nivel es realmente 7 es aprox. 83.7%.