Prueba T

knitr::opts_chunk$set(echo = TRUE)
### One Sample t-test
## Ho: mu >= 3kg
## Ha: mu < 3kg
t_mass <- round(rnorm(50,3,0.4), digits = 3)
t_mass

##  [1] 2.573 2.754 2.533 3.666 3.584 3.361 3.115 2.692 3.701 3.322 2.713 1.699
## [13] 3.525 3.458 2.932 3.251 2.948 3.009 2.900 3.408 3.058 2.521 2.836 2.160
## [25] 3.486 3.011 3.157 3.311 2.688 3.012 3.178 2.563 2.919 3.940 3.163 3.469
## [37] 2.938 3.261 3.601 3.072 3.220 3.064 3.836 3.016 2.932 3.210 2.677 2.719
## [49] 3.157 2.583

library(psych)
class(t_mass)

## [1] "numeric"

describe(t_mass)

##    vars  n mean   sd median trimmed  mad min  max range  skew kurtosis   se
## X1    1 50 3.06 0.42   3.06    3.07 0.42 1.7 3.94  2.24 -0.46     0.77 0.06

## La media muestral es 3.1kg
t.test(t_mass, alternative = "l", mu = 3, 
       conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  t_mass
## t = 0.96743, df = 49, p-value = 0.831
## alternative hypothesis: true mean is less than 3
## 95 percent confidence interval:
##      -Inf 3.158623
## sample estimates:
## mean of x 
##   3.05804

# Ya que el p valor > 5%, no rechazo Ho

### Prueba T independientes
# 100 Parcelas de 3x3m
# Se midio rendimiento (rto) RESPUESTA
# y se usaran dos formulas de fertilizacion (FACTOR 1)
# Con esta funcion se crea la grilla
xy = expand.grid(x = 1:10, y = 1:10)

rto = rnorm(100,3,0.3)
plot(xy, cex = rto/2, pch = 15, col="lightgreen")

################ COMO USAR which()
which(xy$x<=5) # indice del df donde la condicion verdad

##  [1]  1  2  3  4  5 11 12 13 14 15 21 22 23 24 25 31 32 33 34 35 41 42 43 44 45
## [26] 51 52 53 54 55 61 62 63 64 65 71 72 73 74 75 81 82 83 84 85 91 92 93 94 95

rto[which(xy$x<=5)] # extrae rediminto segun el indice

##  [1] 2.682569 3.186991 3.053848 3.365138 3.293858 3.419667 3.037716 3.120085
##  [9] 3.109296 2.305620 3.080439 2.977005 3.096961 3.105185 3.156673 3.226556
## [17] 1.989058 3.363759 2.630768 2.745072 2.786581 2.969365 3.509884 2.965715
## [25] 2.420841 2.484184 2.965598 3.204536 2.899330 2.816966 2.539629 2.585368
## [33] 2.913458 2.827941 2.964206 2.412713 3.028763 2.809008 3.164440 3.144602
## [41] 2.656094 3.633043 3.086170 3.210039 2.454141 2.592038 3.251205 3.168448
## [49] 2.989848 4.216495

which(xy$x>5 & xy$y>5) # indice que cumple las 2 condiciones (&)

##  [1]  56  57  58  59  60  66  67  68  69  70  76  77  78  79  80  86  87  88  89
## [20]  90  96  97  98  99 100

rto[which(xy$x>5 & xy$y>5)] # valores rto de los indices respectivos

##  [1] 3.418505 3.301966 3.194797 2.988391 2.844520 2.636521 2.541877 3.509907
##  [9] 3.705362 3.044060 3.064158 3.143386 2.925500 2.580276 2.860425 2.889014
## [17] 3.017834 3.010051 3.434500 2.770627 2.821737 2.873892 2.373229 3.225994
## [25] 3.518569

##############################################

rto[which(xy$x<=5 & xy$y<=5)] = sort(rto[which(xy$x<=5 & xy$y<=5)])
rto[which(xy$x>5 & xy$y>5)] = sort(rto[which(xy$x>5 & xy$y>5)],
                                   decreasing = T) 
# ordenar de manera decreciente -> decreasing = T

# generando 2 niveles del factor fertilizacion > 2 tratamientos

fert = gl(n = 2, k = 50, length = 100,
          labels = c("15:15:15", "15:15:15:2:5"))
fert

##   [1] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##   [6] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [11] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [16] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [21] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [26] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [31] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [36] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [41] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [46] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [51] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [56] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [61] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [66] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [71] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [76] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [81] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [86] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [91] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [96] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
## Levels: 15:15:15 15:15:15:2:5

# aleatorizacion de los fertilizantes sobre las parcelas
# funcion sample() de fert
fert = sample(x = fert, size = length(fert))
fert

##   [1] 15:15:15     15:15:15:2:5 15:15:15     15:15:15     15:15:15    
##   [6] 15:15:15:2:5 15:15:15:2:5 15:15:15     15:15:15:2:5 15:15:15:2:5
##  [11] 15:15:15:2:5 15:15:15:2:5 15:15:15     15:15:15     15:15:15    
##  [16] 15:15:15:2:5 15:15:15:2:5 15:15:15     15:15:15     15:15:15:2:5
##  [21] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15    
##  [26] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15    
##  [31] 15:15:15     15:15:15:2:5 15:15:15     15:15:15     15:15:15:2:5
##  [36] 15:15:15     15:15:15:2:5 15:15:15     15:15:15     15:15:15    
##  [41] 15:15:15     15:15:15     15:15:15:2:5 15:15:15:2:5 15:15:15    
##  [46] 15:15:15:2:5 15:15:15     15:15:15     15:15:15     15:15:15    
##  [51] 15:15:15     15:15:15     15:15:15     15:15:15     15:15:15:2:5
##  [56] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5
##  [61] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15    
##  [66] 15:15:15:2:5 15:15:15     15:15:15:2:5 15:15:15:2:5 15:15:15    
##  [71] 15:15:15:2:5 15:15:15:2:5 15:15:15     15:15:15:2:5 15:15:15    
##  [76] 15:15:15     15:15:15     15:15:15:2:5 15:15:15:2:5 15:15:15    
##  [81] 15:15:15     15:15:15:2:5 15:15:15     15:15:15     15:15:15:2:5
##  [86] 15:15:15:2:5 15:15:15:2:5 15:15:15     15:15:15     15:15:15:2:5
##  [91] 15:15:15:2:5 15:15:15:2:5 15:15:15:2:5 15:15:15     15:15:15:2:5
##  [96] 15:15:15:2:5 15:15:15     15:15:15     15:15:15:2:5 15:15:15:2:5
## Levels: 15:15:15 15:15:15:2:5

table(fert)

## fert
##     15:15:15 15:15:15:2:5 
##           50           50

# cambiando color segun el tipo de fertilizante
# funcion ifelse > si es igual a 'darkgreen' de lo contrario 'lightgreen'
color = ifelse(fert == "15:15:15", "darkgreen", "lightgreen")

# generando grafico
plot(xy, cex = rto/3, pch = 15, col = color, 
     main = 'Distribucion de rendimiento en 100 parcelas experimentales')

datos = data.frame(rend = rto, frt = fert)
table(datos$frt)

## 
##     15:15:15 15:15:15:2:5 
##           50           50

### Estadisticas descriptivas
# medias por tratamiento
(medias = tapply(datos$rend,datos$frt , mean))

##     15:15:15 15:15:15:2:5 
##     2.983093     3.051231

# varianza
(var = tapply(datos$rend,datos$frt , var))

##     15:15:15 15:15:15:2:5 
##   0.09461377   0.16235050

# desv
(desv = tapply(datos$rend,datos$frt , sd))

##     15:15:15 15:15:15:2:5 
##    0.3075935    0.4029274

# coeficiente de variacion
(cov = 100*desv/medias)

##     15:15:15 15:15:15:2:5 
##     10.31123     13.20540

# graficos
library(lattice)
bwplot(datos$rend~datos$frt,ylab="Rendimiento")

# histograma multiple
library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

ggplot(datos, aes(x = rend, fill = frt))+
  geom_density(alpha = 0.6)

### pruebas de hipotesis
# comparar las medias del rendimiento de los fert. usados

# prueba t-student para 2 muestras independientes
# las parcelas en las que utilizo los fert no estan relacionadas

# datos de rendimientol para fert 1
datos$rend[datos$frt == '15:15:15']

##  [1] 1.989058 2.420841 2.630768 2.682569 2.942986 2.965715 2.969365 2.977005
##  [9] 3.347918 3.308371 3.037716 3.053848 3.080439 3.096961 3.105185 2.913369
## [17] 3.109296 3.156673 3.186991 3.303651 2.541959 3.302791 2.888905 3.293858
## [25] 3.363759 3.509884 2.393303 2.962529 3.520904 3.485114 2.484184 2.965598
## [33] 3.204536 2.899330 2.964206 3.225994 3.064158 2.809008 3.144602 3.044060
## [41] 3.017834 2.925500 2.656094 3.086170 3.210039 2.860425 2.844520 2.989848
## [49] 2.636521 2.580276

################################################ Inferencia
# Ho: mu(rend)trip 15 == mu(rend)trip 15 2:5
# Ha: mu(rend)trip 15 < mu(rend)trip 15 2:5
# mu = 0 , significa que las medias son iguales
# alternative (t, l, g) > segun Ha
# t: Ha no igual (!=)
# g: Ha mayor que >
# l: Ha menor que <

# corriendo prueba t 
# Asumiendo que las varianzas son igual 
prb_1 = t.test(x = datos$rend[datos$frt == '15:15:15'],
               y = datos$rend[datos$frt == '15:15:15:2:5'],
               mu = 0, alternative = 'l', conf.level = 0.95)
# t.test(x = , y = ,alternative = , mu = , conf.level = )
# extraer p valor de la prueba
prb_1

## 
##  Welch Two Sample t-test
## 
## data:  datos$rend[datos$frt == "15:15:15"] and datos$rend[datos$frt == "15:15:15:2:5"]
## t = -0.95048, df = 91.633, p-value = 0.1722
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf 0.0509835
## sample estimates:
## mean of x mean of y 
##  2.983093  3.051231

prb_1$p.value

## [1] 0.1721856

ifelse(prb_1$p.value<0.05,
       'Rechazo Ho: Las medias no son iguales',
       'No rechazo Ho: medias iguales')

## [1] "No rechazo Ho: medias iguales"

# tiene sentido el resultado anterior?
# La prueba t tiene dos versiones 
# cuando las varianzas son iguales y cuando
# las varianzas son distintas

# verificando varianzas, prueba f
# H0: la varianza de los datos de fert1 == varianza fert 2
# Ha: la varianza de los datos de fert1 != varianza fert 2
# ratio = 1, cociente de varianzas == 1 (iguales)
# F test to compare two variances
prb_1v = var.test(x = datos$rend[datos$frt == '15:15:15'],
                  y = datos$rend[datos$frt == '15:15:15:2:5'], 
                  ratio = 1, alternative =  't', conf.level = 0.95)

ifelse(prb_1v$p.value < 0.05,
       'Rechazo Ho: var desiguales',
       'No rechazo Ho: var iguales')

## [1] "No rechazo Ho: var iguales"

# P = 0.2512
# Quedamos insatisfechos con el umbral del P = 0.05
# prueba anterior se puede realizar la prueba asumiendo varianzas iguales
# varianza iguales en t-test: var.equal =T

## Si las varianzas hubieran sido desiguales
#prb_1b = t.test(x = datos$rend[datos$frt == '15:15:15'],
#             y = datos$rend[datos$frt == '15:15:15:2:5'],
#             mu = 0, alternative = 'l',conf.level = 0.95,
#             var.equal = F)

# conclusion: cualquier fertilizante puede ser utilizado, 
# o el de menor impacto ambiental, o menor costo, o producto nacional...

# no hay diferencia entre los rendimientos...


######### prueba T-student para dos muestras relacionadas o pareadas
# Con esta funcion se crea la grilla
xy10 = expand.grid(x = 1:10,
                   y = 1:10)

xy20 = expand.grid(x = 1:10,
                   y = 1:10)

# Generacion de datos 
MO_10 = sort(rnorm(100,3,0.45),decreasing = T)
MO_20 = sort(rnorm(100,2.7,0.35),decreasing = T)

# explorando la correlacion
cor(MO_10, MO_20)

## [1] 0.9836192

plot(MO_10,MO_20, xlim =c(0, 4), ylim =c(0,4))
curve(1*x, add = T, from = 0, to = 4)
text(3,1,'MO_10 > MO_20')
text(1,3,'MO_10 < MO_20')

# datos correlacionados
# muestras relacionas
# muestras en la misma coordenada
# prueba T-student para muestras correlacionadas

# Ho: mu(MO_10) == mu(MO_20)
# Ha: mu(MO_10) != mu(MO_20)

prb_2 = t.test(MO_10, MO_20,paired = T, mu = 0, conf.level = 0.95,
               alternative = 't')

ifelse(prb_2$p.value<0.05,
       'Rechazo Ho: ambas capas tiene diferente MO',
       'No rechazo Ho: la MO de las capas es igual')

## [1] "Rechazo Ho: ambas capas tiene diferente MO"

boxplot(MO_10, MO_20)

#par(mfrow=(c(1,1)))

plot(xy10, cex = MO_10/2, pch = 15, 
     col = gray.colors(n = length(MO_10),start=0,end=1),
     main = 'Contenido MO capa A')

plot(xy20, cex = MO_20/2, pch = 15, 
     col = gray.colors(n = length(MO_20),start=0,end=1),
     main = 'Contenido MO capa B')

# prueba de correlacion

prb_3 = cor.test(MO_10, MO_20,conf.level = 0.95,alternative = 't')
ifelse(prb_3$p.value<0.05,'Correlacion NO nula',
       'Correlacion nula')

## [1] "Correlacion NO nula"

# >> Hay correlacion