Clase 2.R

Roger Guevara — May 21, 2014, 7:58 AM

#El erreor estándar es la la desviación estandar de un conjuto demedias

#Creamos un objeto con un solo elemento, por ejemplo cero, o creamos un objeto numerico vacío con la función numeric()
PROMEDIO <- 0
PROMEDIO

[1] 0


PROMEDIO <- numeric()
PROMEDIO

numeric(0)


EE  <- numeric()

#Usamos un loop para extrare en cada una de diez mil veces una muestra  de 15 elementos apartir de una población con media 20 y desviación estándar 2. Almacenamos en los objetos PROMEDIO y EE cada una de los promedios y errores estándares calculados de cada una de las muestras.

for(i  in 1:10000) {
MUESTRA <- rnorm(15, mean = 20, sd = 2)
PROMEDIO[i] <- mean(MUESTRA)
EE[i] <- sd(MUESTRA)/sqrt(15) 
                  }

i

[1] 10000


PROMEDIO[1:20]

 [1] 19.90 21.94 20.38 20.09 19.91 19.81 19.23 20.26 19.24 19.18 20.20
[12] 20.64 21.55 19.76 20.66 20.43 20.29 19.61 20.02 20.94

PROMEDIO[9980:10000]

 [1] 20.17 18.99 20.47 19.44 19.80 19.80 20.19 19.32 20.42 19.48 21.15
[12] 20.43 20.06 19.85 20.14 20.26 19.77 20.81 19.05 19.31 20.90

length(PROMEDIO)

[1] 10000


sd(PROMEDIO)

[1] 0.5137

sd(rnorm(15, mean = 20, sd = 2))/sqrt(15)

[1] 0.4351

hist(EE)
abline(v=sd(PROMEDIO), col="red", lwd =4)

plot of chunk unnamed-chunk-1



pnorm(0, 0, 1)

[1] 0.5

qnorm(0.025, 0, 1)

[1] -1.96

qnorm(0.975, 0, 1)

[1] 1.96


#source("c:\\...\\...\\funciones.r")

source("~/desktop/cursos R/2013/funciones.r")

plot(c(17, 22), c(0, 100), type = "n")
abline(v= 20, lwd = 3, col = "red")
for(i  in 1:100) {
  MUESTRA <- rnorm(15, mean = 20, sd = 2)
  PROMEDIO <- mean(MUESTRA)
  EE <- ee(MUESTRA)
  LI <- PROMEDIO - 1.96*EE
  LS <- PROMEDIO + 1.96*EE
  lines(c(LI, LS), c(i, i), lwd =2, col ="blue")
                }

plot of chunk unnamed-chunk-1



T <- qt(0.975, 14)
plot(c(17, 22), c(0, 100), type = "n")
abline(v= 20, lwd = 3, col = "red")
for(i  in 1:100) {
  MUESTRA <- rnorm(15, mean = 20, sd = 2)
  PROMEDIO <- mean(MUESTRA)
  EE <- ee(MUESTRA)
  LI <- PROMEDIO - T*EE
  LS <- PROMEDIO + T*EE
  lines(c(LI, LS), c(i, i), lwd =2, col ="blue")
}

plot of chunk unnamed-chunk-1




#T
MUESTRA <- rnorm(15, mean = 20, sd = 2)
EE <- ee(MUESTRA)
PROMEDIO <- mean(MUESTRA)

PROMEDIO/EE

[1] 29.01


1 - pt(50.45, 14)

[1] 0

pt(50.45, 14, lower = FALSE)

[1] 1.539e-17



MUESTRA1 <- rnorm(15, mean = 20, sd = 2)
MUESTRA2 <- rnorm(15, mean = 26, sd = 2)

MEDIA1 <- mean(MUESTRA1)
MEDIA2 <- mean(MUESTRA2)
VAR1 <- var(MUESTRA1)
VAR2 <- var(MUESTRA2)

(MEDIA1 - MEDIA2)/  sqrt(VAR1/15 + VAR2/15)

[1] -7.544


pt(-6.222044, 28, lower=TRUE)

[1] 5.045e-07



MUESTRA1 <- rnorm(15, mean = 20, sd = 2)
MUESTRA2 <- rnorm(15, mean = 22, sd = 5)

MEDIA1 <- mean(MUESTRA1)
MEDIA2 <- mean(MUESTRA2)
VAR1 <- var(MUESTRA1)
VAR2 <- var(MUESTRA2)

(MEDIA1 - MEDIA2)/  sqrt(VAR1/15 + VAR2/15)

[1] -1.711


pt(2.327505, 28, lower=FALSE)

[1] 0.0137


t.test(MUESTRA2, MUESTRA1,var.equal=TRUE)


    Two Sample t-test

data:  MUESTRA2 and MUESTRA1 
t = 1.712, df = 28, p-value = 0.09805
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -0.4687  5.2305 
sample estimates:
mean of x mean of y 
    21.83     19.45




pt(4.7637, 20, lower=FALSE) * 2

[1] 0.0001186



#PAREADA
DATOS <- read.table("~/desktop/cursos R/2014/paired.csv", sep = ",", header = TRUE)
attach(DATOS)
names(DATOS)

[1] "Location"   "Upstream"   "Downstream"


t.test(Upstream, Downstream)


    Welch Two Sample t-test

data:  Upstream and Downstream 
t = 1.109, df = 15.59, p-value = 0.2845
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -0.09472  0.30138 
sample estimates:
mean of x mean of y 
    5.749     5.646


t.test(Upstream-Downstream)


    One Sample t-test

data:  Upstream - Downstream 
t = 3.077, df = 8, p-value = 0.01519
alternative hypothesis: true mean is not equal to 0 
95 percent confidence interval:
 0.02589 0.18077 
sample estimates:
mean of x 
   0.1033


DIF <- Upstream-Downstream
t.test(DIF)


    One Sample t-test

data:  DIF 
t = 3.077, df = 8, p-value = 0.01519
alternative hypothesis: true mean is not equal to 0 
95 percent confidence interval:
 0.02589 0.18077 
sample estimates:
mean of x 
   0.1033

par(cex=1)
boxplot(DIF)
abline(h=0, lty = 2)

plot of chunk unnamed-chunk-1


t.test(Upstream, Downstream, paired = TRUE)


    Paired t-test

data:  Upstream and Downstream 
t = 3.077, df = 8, p-value = 0.01519
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 0.02589 0.18077 
sample estimates:
mean of the differences 
                 0.1033


wilcox.test(Upstream, Downstream, paired = TRUE)


    Wilcoxon signed rank test

data:  Upstream and Downstream 
V = 44, p-value = 0.007812
alternative hypothesis: true location shift is not equal to 0


VARIANZA1 <- var(Upstream)
VARIANZA2 <- var(Downstream)

VARIANZA2/VARIANZA1

[1] 1.389

pf(1.389224, 8, 8, lower=FALSE)

[1] 0.3265


ALEAT.F <- rf(10000, 8, 8)
plot(density(ALEAT.F))
abline(v=1.389224, col ="red")

plot of chunk unnamed-chunk-1

var.test(Downstream, Upstream)


    F test to compare two variances

data:  Downstream and Upstream 
F = 1.389, num df = 8, denom df = 8, p-value = 0.6529
alternative hypothesis: true ratio of variances is not equal to 1 
95 percent confidence interval:
 0.3134 6.1588 
sample estimates:
ratio of variances 
             1.389




#
9 * 0.5^8 * 0.5^1

[1] 0.01758

0.5^8 * 0.5^1

[1] 0.001953


factorial(9)/(factorial(8)*factorial(1))

[1] 9




#

M <- matrix(c(50, 8, 10, 25, 20, 5, 2, 1, 12), 3, 3)

#Y para facilitar la identidad de las filas y columnas le agregamos nombres a las filas y columnas

rownames(M) <- c("Helechos", "Orqid.", "Bromelias")
colnames(M) <- c("Palmas", "Árboles", "Postes")

TEST <- chisq.test(M)

Warning: Chi-squared approximation may be incorrect

TEST$residuals

          Palmas Árboles Postes
Helechos   1.694 -0.7337 -2.268
Orqid.    -1.773  2.7553 -1.256
Bromelias -1.024 -1.6166  5.132

colSums(M)/133

 Palmas Árboles  Postes 
 0.5113  0.3759  0.1128

rowSums(M)/133

 Helechos    Orqid. Bromelias 
   0.5789    0.2180    0.2030


(colSums(M)/133) * (rowSums(M)/133)[1] *133

 Palmas Árboles  Postes 
 39.368  28.947   8.684

(colSums(M)/133) * (rowSums(M)/133)[2] *133

 Palmas Árboles  Postes 
 14.827  10.902   3.271

(colSums(M)/133) * (rowSums(M)/133)[3] * 133

 Palmas Árboles  Postes 
 13.805  10.150   3.045


sum(colSums(M))

[1] 133

sum(rowSums(M))

[1] 133