knitr::opts_chunk$set(echo = TRUE)
### One Sample t-test
## Ho: mu >= 3kg
## Ha: mu < 3kg
t_mass <- round(rnorm(50,3,0.4), digits = 3)
t_mass
## [1] 2.573 2.754 2.533 3.666 3.584 3.361 3.115 2.692 3.701 3.322 2.713 1.699
## [13] 3.525 3.458 2.932 3.251 2.948 3.009 2.900 3.408 3.058 2.521 2.836 2.160
## [25] 3.486 3.011 3.157 3.311 2.688 3.012 3.178 2.563 2.919 3.940 3.163 3.469
## [37] 2.938 3.261 3.601 3.072 3.220 3.064 3.836 3.016 2.932 3.210 2.677 2.719
## [49] 3.157 2.583
library(psych)
class(t_mass)
## [1] "numeric"
describe(t_mass)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 50 3.06 0.42 3.06 3.07 0.42 1.7 3.94 2.24 -0.46 0.77 0.06
## La media muestral es 3.1kg
t.test(t_mass, alternative = "l", mu = 3,
conf.level = 0.95)
##
## One Sample t-test
##
## data: t_mass
## t = 0.96743, df = 49, p-value = 0.831
## alternative hypothesis: true mean is less than 3
## 95 percent confidence interval:
## -Inf 3.158623
## sample estimates:
## mean of x
## 3.05804
# Ya que el p valor > 5%, no rechazo Ho
### Prueba T independientes
# 100 Parcelas de 3x3m
# Se midio rendimiento (rto) RESPUESTA
# y se usaran dos formulas de fertilizacion (FACTOR 1)
# Con esta funcion se crea la grilla
xy = expand.grid(x = 1:10, y = 1:10)
rto = rnorm(100,3,0.3)
plot(xy, cex = rto/2, pch = 15, col="lightgreen")

################ COMO USAR which()
which(xy$x<=5) # indice del df donde la condicion verdad
## [1] 1 2 3 4 5 11 12 13 14 15 21 22 23 24 25 31 32 33 34 35 41 42 43 44 45
## [26] 51 52 53 54 55 61 62 63 64 65 71 72 73 74 75 81 82 83 84 85 91 92 93 94 95
rto[which(xy$x<=5)] # extrae rediminto segun el indice
## [1] 2.682569 3.186991 3.053848 3.365138 3.293858 3.419667 3.037716 3.120085
## [9] 3.109296 2.305620 3.080439 2.977005 3.096961 3.105185 3.156673 3.226556
## [17] 1.989058 3.363759 2.630768 2.745072 2.786581 2.969365 3.509884 2.965715
## [25] 2.420841 2.484184 2.965598 3.204536 2.899330 2.816966 2.539629 2.585368
## [33] 2.913458 2.827941 2.964206 2.412713 3.028763 2.809008 3.164440 3.144602
## [41] 2.656094 3.633043 3.086170 3.210039 2.454141 2.592038 3.251205 3.168448
## [49] 2.989848 4.216495
which(xy$x>5 & xy$y>5) # indice que cumple las 2 condiciones (&)
## [1] 56 57 58 59 60 66 67 68 69 70 76 77 78 79 80 86 87 88 89
## [20] 90 96 97 98 99 100
rto[which(xy$x>5 & xy$y>5)] # valores rto de los indices respectivos
## [1] 3.418505 3.301966 3.194797 2.988391 2.844520 2.636521 2.541877 3.509907
## [9] 3.705362 3.044060 3.064158 3.143386 2.925500 2.580276 2.860425 2.889014
## [17] 3.017834 3.010051 3.434500 2.770627 2.821737 2.873892 2.373229 3.225994
## [25] 3.518569
##############################################
rto[which(xy$x<=5 & xy$y<=5)] = sort(rto[which(xy$x<=5 & xy$y<=5)])
rto[which(xy$x>5 & xy$y>5)] = sort(rto[which(xy$x>5 & xy$y>5)],
decreasing = T)
# ordenar de manera decreciente -> decreasing = T
# generando 2 niveles del factor fertilizacion > 2 tratamientos
fert = gl(n = 2, k = 50, length = 100,
labels = c("15:15:15", "15:15:15:2:5"))
fert
## [1] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [6] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [11] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [16] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [21] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [26] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [31] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [36] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [41] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [46] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [51] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [56] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [61] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [66] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [71] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [76] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [81] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [86] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [91] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [96] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## Levels: 15:15:15 15:15:15:2:5
# aleatorizacion de los fertilizantes sobre las parcelas
# funcion sample() de fert
fert = sample(x = fert, size = length(fert))
fert
## [1] 15:15:15 15:15:15:2:5 15:15:15 15:15:15 15:15:15
## [6] 15:15:15:2:5 15:15:15:2:5 15:15:15 15:15:15:2:5 15:15:15:2:5
## [11] 15:15:15:2:5 15:15:15:2:5 15:15:15 15:15:15 15:15:15
## [16] 15:15:15:2:5 15:15:15:2:5 15:15:15 15:15:15 15:15:15:2:5
## [21] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15
## [26] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15
## [31] 15:15:15 15:15:15:2:5 15:15:15 15:15:15 15:15:15:2:5
## [36] 15:15:15 15:15:15:2:5 15:15:15 15:15:15 15:15:15
## [41] 15:15:15 15:15:15 15:15:15:2:5 15:15:15:2:5 15:15:15
## [46] 15:15:15:2:5 15:15:15 15:15:15 15:15:15 15:15:15
## [51] 15:15:15 15:15:15 15:15:15 15:15:15 15:15:15:2:5
## [56] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## [61] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15
## [66] 15:15:15:2:5 15:15:15 15:15:15:2:5 15:15:15:2:5 15:15:15
## [71] 15:15:15:2:5 15:15:15:2:5 15:15:15 15:15:15:2:5 15:15:15
## [76] 15:15:15 15:15:15 15:15:15:2:5 15:15:15:2:5 15:15:15
## [81] 15:15:15 15:15:15:2:5 15:15:15 15:15:15 15:15:15:2:5
## [86] 15:15:15:2:5 15:15:15:2:5 15:15:15 15:15:15 15:15:15:2:5
## [91] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15 15:15:15:2:5
## [96] 15:15:15:2:5 15:15:15 15:15:15 15:15:15:2:5 15:15:15:2:5
## Levels: 15:15:15 15:15:15:2:5
table(fert)
## fert
## 15:15:15 15:15:15:2:5
## 50 50
# cambiando color segun el tipo de fertilizante
# funcion ifelse > si es igual a 'darkgreen' de lo contrario 'lightgreen'
color = ifelse(fert == "15:15:15", "darkgreen", "lightgreen")
# generando grafico
plot(xy, cex = rto/3, pch = 15, col = color,
main = 'Distribucion de rendimiento en 100 parcelas experimentales')

datos = data.frame(rend = rto, frt = fert)
table(datos$frt)
##
## 15:15:15 15:15:15:2:5
## 50 50
### Estadisticas descriptivas
# medias por tratamiento
(medias = tapply(datos$rend,datos$frt , mean))
## 15:15:15 15:15:15:2:5
## 2.983093 3.051231
# varianza
(var = tapply(datos$rend,datos$frt , var))
## 15:15:15 15:15:15:2:5
## 0.09461377 0.16235050
# desv
(desv = tapply(datos$rend,datos$frt , sd))
## 15:15:15 15:15:15:2:5
## 0.3075935 0.4029274
# coeficiente de variacion
(cov = 100*desv/medias)
## 15:15:15 15:15:15:2:5
## 10.31123 13.20540
# graficos
library(lattice)
bwplot(datos$rend~datos$frt,ylab="Rendimiento")

# histograma multiple
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
ggplot(datos, aes(x = rend, fill = frt))+
geom_density(alpha = 0.6)

### pruebas de hipotesis
# comparar las medias del rendimiento de los fert. usados
# prueba t-student para 2 muestras independientes
# las parcelas en las que utilizo los fert no estan relacionadas
# datos de rendimientol para fert 1
datos$rend[datos$frt == '15:15:15']
## [1] 1.989058 2.420841 2.630768 2.682569 2.942986 2.965715 2.969365 2.977005
## [9] 3.347918 3.308371 3.037716 3.053848 3.080439 3.096961 3.105185 2.913369
## [17] 3.109296 3.156673 3.186991 3.303651 2.541959 3.302791 2.888905 3.293858
## [25] 3.363759 3.509884 2.393303 2.962529 3.520904 3.485114 2.484184 2.965598
## [33] 3.204536 2.899330 2.964206 3.225994 3.064158 2.809008 3.144602 3.044060
## [41] 3.017834 2.925500 2.656094 3.086170 3.210039 2.860425 2.844520 2.989848
## [49] 2.636521 2.580276
################################################ Inferencia
# Ho: mu(rend)trip 15 == mu(rend)trip 15 2:5
# Ha: mu(rend)trip 15 < mu(rend)trip 15 2:5
# mu = 0 , significa que las medias son iguales
# alternative (t, l, g) > segun Ha
# t: Ha no igual (!=)
# g: Ha mayor que >
# l: Ha menor que <
# corriendo prueba t
# Asumiendo que las varianzas son igual
prb_1 = t.test(x = datos$rend[datos$frt == '15:15:15'],
y = datos$rend[datos$frt == '15:15:15:2:5'],
mu = 0, alternative = 'l', conf.level = 0.95)
# t.test(x = , y = ,alternative = , mu = , conf.level = )
# extraer p valor de la prueba
prb_1
##
## Welch Two Sample t-test
##
## data: datos$rend[datos$frt == "15:15:15"] and datos$rend[datos$frt == "15:15:15:2:5"]
## t = -0.95048, df = 91.633, p-value = 0.1722
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf 0.0509835
## sample estimates:
## mean of x mean of y
## 2.983093 3.051231
prb_1$p.value
## [1] 0.1721856
ifelse(prb_1$p.value<0.05,
'Rechazo Ho: Las medias no son iguales',
'No rechazo Ho: medias iguales')
## [1] "No rechazo Ho: medias iguales"
# tiene sentido el resultado anterior?
# La prueba t tiene dos versiones
# cuando las varianzas son iguales y cuando
# las varianzas son distintas
# verificando varianzas, prueba f
# H0: la varianza de los datos de fert1 == varianza fert 2
# Ha: la varianza de los datos de fert1 != varianza fert 2
# ratio = 1, cociente de varianzas == 1 (iguales)
# F test to compare two variances
prb_1v = var.test(x = datos$rend[datos$frt == '15:15:15'],
y = datos$rend[datos$frt == '15:15:15:2:5'],
ratio = 1, alternative = 't', conf.level = 0.95)
ifelse(prb_1v$p.value < 0.05,
'Rechazo Ho: var desiguales',
'No rechazo Ho: var iguales')
## [1] "No rechazo Ho: var iguales"
# P = 0.2512
# Quedamos insatisfechos con el umbral del P = 0.05
# prueba anterior se puede realizar la prueba asumiendo varianzas iguales
# varianza iguales en t-test: var.equal =T
## Si las varianzas hubieran sido desiguales
#prb_1b = t.test(x = datos$rend[datos$frt == '15:15:15'],
# y = datos$rend[datos$frt == '15:15:15:2:5'],
# mu = 0, alternative = 'l',conf.level = 0.95,
# var.equal = F)
# conclusion: cualquier fertilizante puede ser utilizado,
# o el de menor impacto ambiental, o menor costo, o producto nacional...
# no hay diferencia entre los rendimientos...
######### prueba T-student para dos muestras relacionadas o pareadas
# Con esta funcion se crea la grilla
xy10 = expand.grid(x = 1:10,
y = 1:10)
xy20 = expand.grid(x = 1:10,
y = 1:10)
# Generacion de datos
MO_10 = sort(rnorm(100,3,0.45),decreasing = T)
MO_20 = sort(rnorm(100,2.7,0.35),decreasing = T)
# explorando la correlacion
cor(MO_10, MO_20)
## [1] 0.9836192
plot(MO_10,MO_20, xlim =c(0, 4), ylim =c(0,4))
curve(1*x, add = T, from = 0, to = 4)
text(3,1,'MO_10 > MO_20')
text(1,3,'MO_10 < MO_20')

# datos correlacionados
# muestras relacionas
# muestras en la misma coordenada
# prueba T-student para muestras correlacionadas
# Ho: mu(MO_10) == mu(MO_20)
# Ha: mu(MO_10) != mu(MO_20)
prb_2 = t.test(MO_10, MO_20,paired = T, mu = 0, conf.level = 0.95,
alternative = 't')
ifelse(prb_2$p.value<0.05,
'Rechazo Ho: ambas capas tiene diferente MO',
'No rechazo Ho: la MO de las capas es igual')
## [1] "Rechazo Ho: ambas capas tiene diferente MO"
boxplot(MO_10, MO_20)

#par(mfrow=(c(1,1)))
plot(xy10, cex = MO_10/2, pch = 15,
col = gray.colors(n = length(MO_10),start=0,end=1),
main = 'Contenido MO capa A')

plot(xy20, cex = MO_20/2, pch = 15,
col = gray.colors(n = length(MO_20),start=0,end=1),
main = 'Contenido MO capa B')

# prueba de correlacion
prb_3 = cor.test(MO_10, MO_20,conf.level = 0.95,alternative = 't')
ifelse(prb_3$p.value<0.05,'Correlacion NO nula',
'Correlacion nula')
## [1] "Correlacion NO nula"
# >> Hay correlacion