Roger Guevara — May 21, 2014, 7:58 AM
#El erreor estándar es la la desviación estandar de un conjuto demedias
#Creamos un objeto con un solo elemento, por ejemplo cero, o creamos un objeto numerico vacío con la función numeric()
PROMEDIO <- 0
PROMEDIO
[1] 0
PROMEDIO <- numeric()
PROMEDIO
numeric(0)
EE <- numeric()
#Usamos un loop para extrare en cada una de diez mil veces una muestra de 15 elementos apartir de una población con media 20 y desviación estándar 2. Almacenamos en los objetos PROMEDIO y EE cada una de los promedios y errores estándares calculados de cada una de las muestras.
for(i in 1:10000) {
MUESTRA <- rnorm(15, mean = 20, sd = 2)
PROMEDIO[i] <- mean(MUESTRA)
EE[i] <- sd(MUESTRA)/sqrt(15)
}
i
[1] 10000
PROMEDIO[1:20]
[1] 19.90 21.94 20.38 20.09 19.91 19.81 19.23 20.26 19.24 19.18 20.20
[12] 20.64 21.55 19.76 20.66 20.43 20.29 19.61 20.02 20.94
PROMEDIO[9980:10000]
[1] 20.17 18.99 20.47 19.44 19.80 19.80 20.19 19.32 20.42 19.48 21.15
[12] 20.43 20.06 19.85 20.14 20.26 19.77 20.81 19.05 19.31 20.90
length(PROMEDIO)
[1] 10000
sd(PROMEDIO)
[1] 0.5137
sd(rnorm(15, mean = 20, sd = 2))/sqrt(15)
[1] 0.4351
hist(EE)
abline(v=sd(PROMEDIO), col="red", lwd =4)
pnorm(0, 0, 1)
[1] 0.5
qnorm(0.025, 0, 1)
[1] -1.96
qnorm(0.975, 0, 1)
[1] 1.96
#source("c:\\...\\...\\funciones.r")
source("~/desktop/cursos R/2013/funciones.r")
plot(c(17, 22), c(0, 100), type = "n")
abline(v= 20, lwd = 3, col = "red")
for(i in 1:100) {
MUESTRA <- rnorm(15, mean = 20, sd = 2)
PROMEDIO <- mean(MUESTRA)
EE <- ee(MUESTRA)
LI <- PROMEDIO - 1.96*EE
LS <- PROMEDIO + 1.96*EE
lines(c(LI, LS), c(i, i), lwd =2, col ="blue")
}
T <- qt(0.975, 14)
plot(c(17, 22), c(0, 100), type = "n")
abline(v= 20, lwd = 3, col = "red")
for(i in 1:100) {
MUESTRA <- rnorm(15, mean = 20, sd = 2)
PROMEDIO <- mean(MUESTRA)
EE <- ee(MUESTRA)
LI <- PROMEDIO - T*EE
LS <- PROMEDIO + T*EE
lines(c(LI, LS), c(i, i), lwd =2, col ="blue")
}
#T
MUESTRA <- rnorm(15, mean = 20, sd = 2)
EE <- ee(MUESTRA)
PROMEDIO <- mean(MUESTRA)
PROMEDIO/EE
[1] 29.01
1 - pt(50.45, 14)
[1] 0
pt(50.45, 14, lower = FALSE)
[1] 1.539e-17
MUESTRA1 <- rnorm(15, mean = 20, sd = 2)
MUESTRA2 <- rnorm(15, mean = 26, sd = 2)
MEDIA1 <- mean(MUESTRA1)
MEDIA2 <- mean(MUESTRA2)
VAR1 <- var(MUESTRA1)
VAR2 <- var(MUESTRA2)
(MEDIA1 - MEDIA2)/ sqrt(VAR1/15 + VAR2/15)
[1] -7.544
pt(-6.222044, 28, lower=TRUE)
[1] 5.045e-07
MUESTRA1 <- rnorm(15, mean = 20, sd = 2)
MUESTRA2 <- rnorm(15, mean = 22, sd = 5)
MEDIA1 <- mean(MUESTRA1)
MEDIA2 <- mean(MUESTRA2)
VAR1 <- var(MUESTRA1)
VAR2 <- var(MUESTRA2)
(MEDIA1 - MEDIA2)/ sqrt(VAR1/15 + VAR2/15)
[1] -1.711
pt(2.327505, 28, lower=FALSE)
[1] 0.0137
t.test(MUESTRA2, MUESTRA1,var.equal=TRUE)
Two Sample t-test
data: MUESTRA2 and MUESTRA1
t = 1.712, df = 28, p-value = 0.09805
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.4687 5.2305
sample estimates:
mean of x mean of y
21.83 19.45
pt(4.7637, 20, lower=FALSE) * 2
[1] 0.0001186
#PAREADA
DATOS <- read.table("~/desktop/cursos R/2014/paired.csv", sep = ",", header = TRUE)
attach(DATOS)
names(DATOS)
[1] "Location" "Upstream" "Downstream"
t.test(Upstream, Downstream)
Welch Two Sample t-test
data: Upstream and Downstream
t = 1.109, df = 15.59, p-value = 0.2845
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.09472 0.30138
sample estimates:
mean of x mean of y
5.749 5.646
t.test(Upstream-Downstream)
One Sample t-test
data: Upstream - Downstream
t = 3.077, df = 8, p-value = 0.01519
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
0.02589 0.18077
sample estimates:
mean of x
0.1033
DIF <- Upstream-Downstream
t.test(DIF)
One Sample t-test
data: DIF
t = 3.077, df = 8, p-value = 0.01519
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
0.02589 0.18077
sample estimates:
mean of x
0.1033
par(cex=1)
boxplot(DIF)
abline(h=0, lty = 2)
t.test(Upstream, Downstream, paired = TRUE)
Paired t-test
data: Upstream and Downstream
t = 3.077, df = 8, p-value = 0.01519
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.02589 0.18077
sample estimates:
mean of the differences
0.1033
wilcox.test(Upstream, Downstream, paired = TRUE)
Wilcoxon signed rank test
data: Upstream and Downstream
V = 44, p-value = 0.007812
alternative hypothesis: true location shift is not equal to 0
VARIANZA1 <- var(Upstream)
VARIANZA2 <- var(Downstream)
VARIANZA2/VARIANZA1
[1] 1.389
pf(1.389224, 8, 8, lower=FALSE)
[1] 0.3265
ALEAT.F <- rf(10000, 8, 8)
plot(density(ALEAT.F))
abline(v=1.389224, col ="red")
var.test(Downstream, Upstream)
F test to compare two variances
data: Downstream and Upstream
F = 1.389, num df = 8, denom df = 8, p-value = 0.6529
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
0.3134 6.1588
sample estimates:
ratio of variances
1.389
#
9 * 0.5^8 * 0.5^1
[1] 0.01758
0.5^8 * 0.5^1
[1] 0.001953
factorial(9)/(factorial(8)*factorial(1))
[1] 9
#
M <- matrix(c(50, 8, 10, 25, 20, 5, 2, 1, 12), 3, 3)
#Y para facilitar la identidad de las filas y columnas le agregamos nombres a las filas y columnas
rownames(M) <- c("Helechos", "Orqid.", "Bromelias")
colnames(M) <- c("Palmas", "Árboles", "Postes")
TEST <- chisq.test(M)
Warning: Chi-squared approximation may be incorrect
TEST$residuals
Palmas Árboles Postes
Helechos 1.694 -0.7337 -2.268
Orqid. -1.773 2.7553 -1.256
Bromelias -1.024 -1.6166 5.132
colSums(M)/133
Palmas Árboles Postes
0.5113 0.3759 0.1128
rowSums(M)/133
Helechos Orqid. Bromelias
0.5789 0.2180 0.2030
(colSums(M)/133) * (rowSums(M)/133)[1] *133
Palmas Árboles Postes
39.368 28.947 8.684
(colSums(M)/133) * (rowSums(M)/133)[2] *133
Palmas Árboles Postes
14.827 10.902 3.271
(colSums(M)/133) * (rowSums(M)/133)[3] * 133
Palmas Árboles Postes
13.805 10.150 3.045
sum(colSums(M))
[1] 133
sum(rowSums(M))
[1] 133