Bibliotecas

library(readxl)
library(tidyverse)
library(janitor)
library(emmeans)
library(effectsize)

Datos ratones

ratones <- read_excel("experimento_ratones.xlsx")
ratones
## # A tibble: 16 x 4
##    Block Treatment Strain    GST
##    <chr> <chr>     <chr>   <dbl>
##  1 A     Control   NIH       444
##  2 A     Treated   NIH       614
##  3 A     Control   BALB/C    423
##  4 A     Treated   BALB/C    625
##  5 A     Control   A/J       408
##  6 A     Treated   A/J       856
##  7 A     Control   129/Ola   447
##  8 A     Treated   129/Ola   719
##  9 B     Control   NIH       764
## 10 B     Treated   NIH       831
## 11 B     Control   BALB/C    586
## 12 B     Treated   BALB/C    782
## 13 B     Control   A/J       609
## 14 B     Treated   A/J      1002
## 15 B     Control   129/Ola   606
## 16 B     Treated   129/Ola   766

Anova

Hipótesis

\[H_0: \mu_1 = \mu_2 = \mu_3 = \mu_4 \\ H_1: \mu_i \neq \mu_j\]

Nivel de significancia

  • Vamos a utilizar 5%

Ajuste del modelo

modelo <- aov(GST ~ Strain, data = ratones)
summary(modelo)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Strain       3  28613    9538   0.271  0.845
## Residuals   12 422077   35173

Estimación de medias

emmeans(modelo, specs = "Strain")
##  Strain  emmean   SE df lower.CL upper.CL
##  129/Ola    634 93.8 12      430      839
##  A/J        719 93.8 12      514      923
##  BALB/C     604 93.8 12      400      808
##  NIH        663 93.8 12      459      868
## 
## Confidence level used: 0.95

Comparación de medias

TukeyHSD(modelo, which = "Strain")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = GST ~ Strain, data = ratones)
## 
## $Strain
##                   diff       lwr      upr     p adj
## A/J-129/Ola      84.25 -309.4684 477.9684 0.9185996
## BALB/C-129/Ola  -30.50 -424.2184 363.2184 0.9954768
## NIH-129/Ola      28.75 -364.9684 422.4684 0.9962012
## BALB/C-A/J     -114.75 -508.4684 278.9684 0.8223712
## NIH-A/J         -55.50 -449.2184 338.2184 0.9742353
## NIH-BALB/C       59.25 -334.4684 452.9684 0.9689979

Residuales

par(mfrow = c(2, 2))
plot(modelo)

Correlación

Correlación paramétrica

cor(iris$Sepal.Length, iris$Petal.Length)
## [1] 0.8717538

Gráfico de dispersión

iris %>% 
  ggplot(aes(x = Sepal.Length, y = Petal.Length)) +
  geom_point() +
  geom_smooth(method = "lm")

Regresión

Modelo

\[y_i = \beta_0 + \beta1X\]

Ajuste del modelo

regresion <- lm(Petal.Length ~ Sepal.Length, data = iris)
summary(regresion)
## 
## Call:
## lm(formula = Petal.Length ~ Sepal.Length, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.47747 -0.59072 -0.00668  0.60484  2.49512 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.10144    0.50666  -14.02   <2e-16 ***
## Sepal.Length  1.85843    0.08586   21.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8678 on 148 degrees of freedom
## Multiple R-squared:   0.76,  Adjusted R-squared:  0.7583 
## F-statistic: 468.6 on 1 and 148 DF,  p-value: < 2.2e-16

Modelo final

\[y_i = -7.10144 + 1.85843X\]