Clase 2.R

Roger Guevara — May 21, 2014, 7:58 AM

#El erreor estándar es la la desviación estandar de un conjuto demedias

#Creamos un objeto con un solo elemento, por ejemplo cero, o creamos un objeto numerico vacío con la función numeric()
PROMEDIO <- 0
PROMEDIO
[1] 0

PROMEDIO <- numeric()
PROMEDIO
numeric(0)

EE  <- numeric()

#Usamos un loop para extrare en cada una de diez mil veces una muestra  de 15 elementos apartir de una población con media 20 y desviación estándar 2. Almacenamos en los objetos PROMEDIO y EE cada una de los promedios y errores estándares calculados de cada una de las muestras.

for(i  in 1:10000) {
MUESTRA <- rnorm(15, mean = 20, sd = 2)
PROMEDIO[i] <- mean(MUESTRA)
EE[i] <- sd(MUESTRA)/sqrt(15) 
                  }

i
[1] 10000

PROMEDIO[1:20]
 [1] 19.90 21.94 20.38 20.09 19.91 19.81 19.23 20.26 19.24 19.18 20.20
[12] 20.64 21.55 19.76 20.66 20.43 20.29 19.61 20.02 20.94
PROMEDIO[9980:10000]
 [1] 20.17 18.99 20.47 19.44 19.80 19.80 20.19 19.32 20.42 19.48 21.15
[12] 20.43 20.06 19.85 20.14 20.26 19.77 20.81 19.05 19.31 20.90
length(PROMEDIO)
[1] 10000

sd(PROMEDIO)
[1] 0.5137
sd(rnorm(15, mean = 20, sd = 2))/sqrt(15)
[1] 0.4351
hist(EE)
abline(v=sd(PROMEDIO), col="red", lwd =4)

plot of chunk unnamed-chunk-1



pnorm(0, 0, 1)
[1] 0.5
qnorm(0.025, 0, 1)
[1] -1.96
qnorm(0.975, 0, 1)
[1] 1.96

#source("c:\\...\\...\\funciones.r")

source("~/desktop/cursos R/2013/funciones.r")

plot(c(17, 22), c(0, 100), type = "n")
abline(v= 20, lwd = 3, col = "red")
for(i  in 1:100) {
  MUESTRA <- rnorm(15, mean = 20, sd = 2)
  PROMEDIO <- mean(MUESTRA)
  EE <- ee(MUESTRA)
  LI <- PROMEDIO - 1.96*EE
  LS <- PROMEDIO + 1.96*EE
  lines(c(LI, LS), c(i, i), lwd =2, col ="blue")
                }

plot of chunk unnamed-chunk-1



T <- qt(0.975, 14)
plot(c(17, 22), c(0, 100), type = "n")
abline(v= 20, lwd = 3, col = "red")
for(i  in 1:100) {
  MUESTRA <- rnorm(15, mean = 20, sd = 2)
  PROMEDIO <- mean(MUESTRA)
  EE <- ee(MUESTRA)
  LI <- PROMEDIO - T*EE
  LS <- PROMEDIO + T*EE
  lines(c(LI, LS), c(i, i), lwd =2, col ="blue")
}

plot of chunk unnamed-chunk-1




#T
MUESTRA <- rnorm(15, mean = 20, sd = 2)
EE <- ee(MUESTRA)
PROMEDIO <- mean(MUESTRA)

PROMEDIO/EE
[1] 29.01

1 - pt(50.45, 14)
[1] 0
pt(50.45, 14, lower = FALSE)
[1] 1.539e-17


MUESTRA1 <- rnorm(15, mean = 20, sd = 2)
MUESTRA2 <- rnorm(15, mean = 26, sd = 2)

MEDIA1 <- mean(MUESTRA1)
MEDIA2 <- mean(MUESTRA2)
VAR1 <- var(MUESTRA1)
VAR2 <- var(MUESTRA2)

(MEDIA1 - MEDIA2)/  sqrt(VAR1/15 + VAR2/15)
[1] -7.544

pt(-6.222044, 28, lower=TRUE)
[1] 5.045e-07


MUESTRA1 <- rnorm(15, mean = 20, sd = 2)
MUESTRA2 <- rnorm(15, mean = 22, sd = 5)

MEDIA1 <- mean(MUESTRA1)
MEDIA2 <- mean(MUESTRA2)
VAR1 <- var(MUESTRA1)
VAR2 <- var(MUESTRA2)

(MEDIA1 - MEDIA2)/  sqrt(VAR1/15 + VAR2/15)
[1] -1.711

pt(2.327505, 28, lower=FALSE)
[1] 0.0137

t.test(MUESTRA2, MUESTRA1,var.equal=TRUE)

    Two Sample t-test

data:  MUESTRA2 and MUESTRA1 
t = 1.712, df = 28, p-value = 0.09805
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -0.4687  5.2305 
sample estimates:
mean of x mean of y 
    21.83     19.45 



pt(4.7637, 20, lower=FALSE) * 2
[1] 0.0001186


#PAREADA
DATOS <- read.table("~/desktop/cursos R/2014/paired.csv", sep = ",", header = TRUE)
attach(DATOS)
names(DATOS)
[1] "Location"   "Upstream"   "Downstream"

t.test(Upstream, Downstream)

    Welch Two Sample t-test

data:  Upstream and Downstream 
t = 1.109, df = 15.59, p-value = 0.2845
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -0.09472  0.30138 
sample estimates:
mean of x mean of y 
    5.749     5.646 

t.test(Upstream-Downstream)

    One Sample t-test

data:  Upstream - Downstream 
t = 3.077, df = 8, p-value = 0.01519
alternative hypothesis: true mean is not equal to 0 
95 percent confidence interval:
 0.02589 0.18077 
sample estimates:
mean of x 
   0.1033 

DIF <- Upstream-Downstream
t.test(DIF)

    One Sample t-test

data:  DIF 
t = 3.077, df = 8, p-value = 0.01519
alternative hypothesis: true mean is not equal to 0 
95 percent confidence interval:
 0.02589 0.18077 
sample estimates:
mean of x 
   0.1033 
par(cex=1)
boxplot(DIF)
abline(h=0, lty = 2)

plot of chunk unnamed-chunk-1


t.test(Upstream, Downstream, paired = TRUE)

    Paired t-test

data:  Upstream and Downstream 
t = 3.077, df = 8, p-value = 0.01519
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 0.02589 0.18077 
sample estimates:
mean of the differences 
                 0.1033 

wilcox.test(Upstream, Downstream, paired = TRUE)

    Wilcoxon signed rank test

data:  Upstream and Downstream 
V = 44, p-value = 0.007812
alternative hypothesis: true location shift is not equal to 0 

VARIANZA1 <- var(Upstream)
VARIANZA2 <- var(Downstream)

VARIANZA2/VARIANZA1
[1] 1.389
pf(1.389224, 8, 8, lower=FALSE)
[1] 0.3265

ALEAT.F <- rf(10000, 8, 8)
plot(density(ALEAT.F))
abline(v=1.389224, col ="red")

plot of chunk unnamed-chunk-1

var.test(Downstream, Upstream)

    F test to compare two variances

data:  Downstream and Upstream 
F = 1.389, num df = 8, denom df = 8, p-value = 0.6529
alternative hypothesis: true ratio of variances is not equal to 1 
95 percent confidence interval:
 0.3134 6.1588 
sample estimates:
ratio of variances 
             1.389 



#
9 * 0.5^8 * 0.5^1
[1] 0.01758
0.5^8 * 0.5^1
[1] 0.001953

factorial(9)/(factorial(8)*factorial(1))
[1] 9



#

M <- matrix(c(50, 8, 10, 25, 20, 5, 2, 1, 12), 3, 3)

#Y para facilitar la identidad de las filas y columnas le agregamos nombres a las filas y columnas

rownames(M) <- c("Helechos", "Orqid.", "Bromelias")
colnames(M) <- c("Palmas", "Árboles", "Postes")

TEST <- chisq.test(M)
Warning: Chi-squared approximation may be incorrect
TEST$residuals
          Palmas Árboles Postes
Helechos   1.694 -0.7337 -2.268
Orqid.    -1.773  2.7553 -1.256
Bromelias -1.024 -1.6166  5.132
colSums(M)/133
 Palmas Árboles  Postes 
 0.5113  0.3759  0.1128 
rowSums(M)/133
 Helechos    Orqid. Bromelias 
   0.5789    0.2180    0.2030 

(colSums(M)/133) * (rowSums(M)/133)[1] *133
 Palmas Árboles  Postes 
 39.368  28.947   8.684 
(colSums(M)/133) * (rowSums(M)/133)[2] *133
 Palmas Árboles  Postes 
 14.827  10.902   3.271 
(colSums(M)/133) * (rowSums(M)/133)[3] * 133
 Palmas Árboles  Postes 
 13.805  10.150   3.045 

sum(colSums(M))
[1] 133
sum(rowSums(M))
[1] 133