Intro

Ejercicio 1

# Lectura de datos
ejer01 <- read_csv("https://goo.gl/3L4EtK", col_types = "cd")
str(ejer01)
## tibble [31 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Group    : chr [1:31] "H-O" "H-O" "H-O" "H-O" ...
##  $ phosphate: num [1:31] 2.3 4.1 4.2 4 4.6 4.6 3.8 5.2 3.1 3.7 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Group = col_character(),
##   ..   phosphate = col_double()
##   .. )

Análisis preliminar

ggplot(ejer01, aes(x = Group, y = phosphate)) + geom_boxplot()

Selección del modelo

fit.M1 <- lm(phosphate ~ Group, data = ejer01)
ols_step_backward_p(fit.M1, prem = 0.05)
## [1] "No variables have been removed from the model."

Estimación

# Estimación del modelo ajustado
tab_model(fit.M1, 
          show.r2 = FALSE, 
          show.p = FALSE)
  phosphate
Predictors Estimates CI
(Intercept) 2.78 2.44 – 3.13
Group [H-O] 1.16 0.67 – 1.66
Group [N-O] 0.65 0.11 – 1.20
Observations 31
# Comparaciones múltiples
TukeyHSD(fit.M1)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = x)
## 
## $Group
##               diff           lwr       upr     p adj
## H-O-C    1.1621212  0.5642291555 1.7600133 0.0001347
## N-O-C    0.6541667  0.0003963287 1.3079370 0.0498380
## N-O-H-O -0.5079545 -1.1735054769 0.1575964 0.1607135
plot(TukeyHSD(fit.M1), las=1, tcl = -.3)

Diagnóstico

# Valores de diagnóstico
diagnostico <- fortify(fit.M1)
# Gráfico
ggplot(diagnostico,aes(x = Group, y = .stdresid)) + 
   geom_boxplot() +
   geom_hline(yintercept = 0, col = "red")

# Tests de hipótesis
ols_test_normality(fit.M1)
## Warning in ks.test(y, "pnorm", mean(y), sd(y)): ties should not be present for
## the Kolmogorov-Smirnov test
## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.9691         0.4942 
## Kolmogorov-Smirnov        0.0858         0.9764 
## Cramer-von Mises          3.2934         0.0000 
## Anderson-Darling          0.2831         0.6106 
## -----------------------------------------------
leveneTest(.stdresid ~ Group, data = diagnostico)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  1.0069 0.3782
##       28
ols_plot_cooksd_chart(fit.M1)

Predicción

# Objeto gráfico
p <- plot_model(fit.M1, "pred", terms = "Group",
           show.stat = TRUE, 
           title ="")
# Tabla de estimaciones
p$data
## 
## # Predicted values of phosphate
## # x = Group
## 
## x | Predicted |   SE | group_col |       95% CI
## -----------------------------------------------
## 1 |      3.95 | 0.17 |         1 | [3.60, 4.29]
## 2 |      3.44 | 0.20 |         1 | [3.04, 3.84]
## 3 |      2.78 | 0.17 |         1 | [2.46, 3.11]

Ejercicio 2

# Lectura de datos
ejer02 <- read_csv("https://goo.gl/J2ZKWK", col_types = "dcc")
str(ejer02)
## tibble [100 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Butterfat: num [1:100] 3.74 4.01 3.77 3.78 4.1 4.06 4.27 3.94 4.11 4.25 ...
##  $ Bread    : chr [1:100] "Ayrshire" "Ayrshire" "Ayrshire" "Ayrshire" ...
##  $ Age      : chr [1:100] "Mature" "2year" "Mature" "2year" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Butterfat = col_double(),
##   ..   Bread = col_character(),
##   ..   Age = col_character()
##   .. )

Análisis preliminar

ggplot(ejer02,aes(x = Age, y = Butterfat, color = Bread)) + 
  geom_boxplot() 

Selección del modelo

fit.M1 <- lm(Butterfat ~ Age*Bread, data = ejer02)
ols_step_backward_p(fit.M1, prem = 0.05)
## 
## 
##                            Elimination Summary                             
## --------------------------------------------------------------------------
##         Variable                   Adj.                                       
## Step     Removed     R-Square    R-Square     C(p)        AIC        RMSE     
## --------------------------------------------------------------------------
##    1    Age:Bread      0.6825      0.6656    -1.0316    115.1153    0.4138    
##    2    Age            0.6771      0.6635    -1.4515    114.8006    0.4151    
## --------------------------------------------------------------------------
fit.M1 <- lm(Butterfat ~ Bread, data = ejer02)

Estimación

# Estimación del modelo ajustado
tab_model(fit.M1, 
          show.r2 = FALSE, 
          show.p = FALSE)
  Butterfat
Predictors Estimates CI
(Intercept) 4.06 3.88 – 4.24
Bread [Canadian] 0.38 0.12 – 0.64
Bread [Guernsey] 0.89 0.63 – 1.15
Bread [Holstein-Fresian] -0.39 -0.65 – -0.13
Bread [Jersey] 1.23 0.97 – 1.49
Observations 100
# Comparaciones múltiples
TukeyHSD(fit.M1)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = x)
## 
## $Bread
##                              diff         lwr         upr     p adj
## Canadian-Ayrshire          0.3785  0.01348598  0.74351402 0.0381804
## Guernsey-Ayrshire          0.8900  0.52498598  1.25501402 0.0000000
## Holstein-Fresian-Ayrshire -0.3905 -0.75551402 -0.02548598 0.0297910
## Jersey-Ayrshire            1.2325  0.86748598  1.59751402 0.0000000
## Guernsey-Canadian          0.5115  0.14648598  0.87651402 0.0016669
## Holstein-Fresian-Canadian -0.7690 -1.13401402 -0.40398598 0.0000007
## Jersey-Canadian            0.8540  0.48898598  1.21901402 0.0000000
## Holstein-Fresian-Guernsey -1.2805 -1.64551402 -0.91548598 0.0000000
## Jersey-Guernsey            0.3425 -0.02251402  0.70751402 0.0767002
## Jersey-Holstein-Fresian    1.6230  1.25798598  1.98801402 0.0000000
plot(TukeyHSD(fit.M1), las=1, tcl = -.3)

Diagnóstico

# Valores de diagnóstico
diagnostico <- fortify(fit.M1)
# Gráfico
ggplot(diagnostico,aes(x = Bread, y = .stdresid)) + 
   geom_boxplot() +
   geom_hline(yintercept = 0, col = "red")

# Tests de hipótesis
ols_test_normality(fit.M1)
## Warning in ks.test(y, "pnorm", mean(y), sd(y)): ties should not be present for
## the Kolmogorov-Smirnov test
## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.968          0.0157 
## Kolmogorov-Smirnov        0.1011         0.2588 
## Cramer-von Mises         15.8037         0.0000 
## Anderson-Darling          0.986          0.0128 
## -----------------------------------------------
leveneTest(.stdresid ~ Bread, data = diagnostico)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  4  3.1914 0.01661 *
##       95                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ols_plot_cooksd_chart(fit.M1)

Box-Cox

MASS::boxcox(fit.M1) 

# Creamos la variable transformada
ejer02 <- ejer02 %>% mutate(Butterfatinv = 1/Butterfat)
# Ajustamos el modelo de nuevo
fit.M1.inv <- lm(Butterfatinv ~ Age*Bread, data = ejer02)
# Selección utilizando el test F
ols_step_backward_p(fit.M1.inv, prem = 0.05)
## 
## 
##                             Elimination Summary                             
## ---------------------------------------------------------------------------
##         Variable                   Adj.                                        
## Step     Removed     R-Square    R-Square     C(p)         AIC        RMSE     
## ---------------------------------------------------------------------------
##    1    Age:Bread      0.7283      0.7138    -1.1215    -503.6001    0.0188    
##    2    Age            0.7225      0.7109    -1.1618    -503.5122    0.0189    
## ---------------------------------------------------------------------------
fit.M1.inv <- lm(Butterfatinv ~ Bread, data = ejer02)
# Estimación del modelo ajustado
tab_model(fit.M1.inv, 
          show.r2 = FALSE, 
          show.p = FALSE)
  Butterfatinv
Predictors Estimates CI
(Intercept) 0.25 0.24 – 0.26
Bread [Canadian] -0.02 -0.03 – -0.01
Bread [Guernsey] -0.04 -0.06 – -0.03
Bread [Holstein-Fresian] 0.03 0.01 – 0.04
Bread [Jersey] -0.06 -0.07 – -0.04
Observations 100
# Comparaciones múltiples
TukeyHSD(fit.M1.inv)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = x)
## 
## $Bread
##                                  diff          lwr          upr     p adj
## Canadian-Ayrshire         -0.02059248 -0.037175383 -0.004009586 0.0072424
## Guernsey-Ayrshire         -0.04341678 -0.059999681 -0.026833884 0.0000000
## Holstein-Fresian-Ayrshire  0.02642525  0.009842347  0.043008144 0.0002392
## Jersey-Ayrshire           -0.05609240 -0.072675300 -0.039509503 0.0000000
## Guernsey-Canadian         -0.02282430 -0.039407196 -0.006241399 0.0021164
## Holstein-Fresian-Canadian  0.04701773  0.030434832  0.063600629 0.0000000
## Jersey-Canadian           -0.03549992 -0.052082816 -0.018917018 0.0000004
## Holstein-Fresian-Guernsey  0.06984203  0.053259129  0.086424927 0.0000000
## Jersey-Guernsey           -0.01267562 -0.029258518  0.003907279 0.2179051
## Jersey-Holstein-Fresian   -0.08251765 -0.099100546 -0.065934749 0.0000000
plot(TukeyHSD(fit.M1.inv), las=1, tcl = -.3)

Diagnóstico

# Valores de diagnóstico
diagnostico <- fortify(fit.M1.inv)
# Gráfico
ggplot(diagnostico,aes(x = Bread, y = .stdresid)) + 
   geom_boxplot() +
   geom_hline(yintercept = 0, col = "red")

# Tests de hipótesis
ols_test_normality(fit.M1.inv)
## Warning in ks.test(y, "pnorm", mean(y), sd(y)): ties should not be present for
## the Kolmogorov-Smirnov test
## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.9922         0.8364 
## Kolmogorov-Smirnov        0.0605         0.8581 
## Cramer-von Mises         32.1197         0.0000 
## Anderson-Darling          0.2898         0.6062 
## -----------------------------------------------
leveneTest(.stdresid ~ Bread, data = diagnostico)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  0.2311 0.9203
##       95
ols_plot_cooksd_chart(fit.M1.inv)

# Objeto gráfico
p <- plot_model(fit.M1.inv, "pred", terms = "Bread",
           show.stat = TRUE, 
           title ="")
# Tabla de estimaciones
p$data
## 
## # Predicted values of Butterfatinv
## # x = Bread
## 
## x | Predicted | SE | group_col |       95% CI
## ---------------------------------------------
## 1 |      0.25 |  0 |         1 | [0.24, 0.26]
## 2 |      0.23 |  0 |         1 | [0.22, 0.23]
## 3 |      0.20 |  0 |         1 | [0.20, 0.21]
## 4 |      0.27 |  0 |         1 | [0.27, 0.28]
## 5 |      0.19 |  0 |         1 | [0.18, 0.20]

Ejercicio 4

# Lectura de datos
ejer04 <- read_csv("https://bit.ly/2GhFsl7", col_types = "dccc")
ejer04 <- ejer04 %>% 
   mutate_if(sapply(ejer04, is.character), as.factor)
str(ejer04)
## tibble [72 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Rendimiento  : num [1:72] 72.2 74.4 64.3 62.5 65.8 71.2 69 70.3 68.8 65 ...
##  $ Concentracion: Factor w/ 4 levels "A","B","C","D": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Catalizador  : Factor w/ 3 levels "C1","C2","C3": 1 1 1 2 2 2 3 3 3 1 ...
##  $ Temperatura  : Factor w/ 2 levels "T1","T2": 1 1 1 1 1 1 1 1 1 2 ...

Análisis preliminar

ggplot(ejer04, aes(x = Concentracion, y = Rendimiento, color = Catalizador)) + 
  facet_wrap(vars(Temperatura)) +
  geom_boxplot() 

ejer04$Concentracion <- as.ordered(ejer04$Concentracion)
ggplot(ejer04, aes(x = Concentracion, y = Rendimiento, color = Catalizador)) + 
  facet_wrap(vars(Temperatura)) +
  geom_boxplot() 

Selección del modelo

fit.M1 <- lm(Rendimiento ~ Concentracion*Catalizador*Temperatura, data = ejer04)
ols_step_backward_p(fit.M1, prem = 0.05)
## Note: model has aliased coefficients
##       sums of squares computed by model comparison
## 
## 
##                                           Elimination Summary                                           
## -------------------------------------------------------------------------------------------------------
##         Variable                                               Adj.                                        
## Step                   Removed                   R-Square    R-Square      C(p)        AIC        RMSE     
## -------------------------------------------------------------------------------------------------------
##    1    Concentracion:Temperatura                  0.6931      0.5461    -10.0000    464.1742    5.2593    
##    2    Concentracion:Catalizador                  0.6931      0.5461    -12.0000    470.1742    5.2593    
##    3    Concentracion:Catalizador:Temperatura      0.5857      0.5331      2.8068    455.7893    5.3342    
## -------------------------------------------------------------------------------------------------------
fit.M1 <- lm(Rendimiento ~ Concentracion + Catalizador + Temperatura + Temperatura:Catalizador, data = ejer04)
# Estimación del modelo ajustado
tab_model(fit.M1, wrap.labels = 150,
          show.r2 = FALSE, 
          show.p = FALSE)
  Rendimiento
Predictors Estimates CI
(Intercept) 71.95 68.87 – 75.03
Concentracion.L 7.12 4.61 – 9.63
Concentracion.Q -2.89 -5.40 – -0.38
Concentracion.C -2.10 -4.61 – 0.41
Catalizador [C2] 1.01 -3.34 – 5.36
Catalizador [C3] 2.20 -2.15 – 6.55
Temperatura [T2] -0.70 -5.05 – 3.65
Catalizador [C2] * Temperatura [T2] 8.63 2.48 – 14.79
Catalizador [C3] * Temperatura [T2] 8.98 2.83 – 15.14
Observations 72

Diagnóstico

# Valores de diagnóstico
diagnostico <- fortify(fit.M1)

# Tests de hipótesis
ols_test_normality(fit.M1)
## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.9757         0.1764 
## Kolmogorov-Smirnov        0.0689         0.8609 
## Cramer-von Mises          4.8872         0.0000 
## Anderson-Darling          0.3672         0.4227 
## -----------------------------------------------
leveneTest(.stdresid ~ Concentracion*Catalizador*Temperatura, data = diagnostico)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group 23  0.4794 0.9707
##       48
ols_plot_cooksd_chart(fit.M1)

# Objeto gráfico
p <- plot_model(fit.M1, "pred", terms = c("Concentracion", "Catalizador", "Temperatura"),
           show.stat = TRUE, 
           title ="")
# Tabla de estimaciones
p$data
## 
## # Predicted values of Rendimiento
## # x = Concentracion
## 
## # Catalizador = C1
## # Temperatura = T1
## 
## x | Predicted |   SE | group_col |         95% CI
## -------------------------------------------------
## 1 |     66.20 | 1.89 |        C1 | [62.50, 69.90]
## 2 |     70.39 | 1.89 |        C1 | [66.70, 74.09]
## 3 |     76.39 | 1.89 |        C1 | [72.70, 80.09]
## 4 |     74.81 | 1.89 |        C1 | [71.11, 78.51]
## 
## # Catalizador = C2
## # Temperatura = T1
## 
## x | Predicted |   SE | group_col |         95% CI
## -------------------------------------------------
## 1 |     67.21 | 1.89 |        C2 | [63.51, 70.90]
## 2 |     71.40 | 1.89 |        C2 | [67.71, 75.10]
## 3 |     77.40 | 1.89 |        C2 | [73.71, 81.10]
## 4 |     75.82 | 1.89 |        C2 | [72.12, 79.52]
## 
## # Catalizador = C3
## # Temperatura = T1
## 
## x | Predicted |   SE | group_col |         95% CI
## -------------------------------------------------
## 1 |     68.40 | 1.89 |        C3 | [64.70, 72.10]
## 2 |     72.59 | 1.89 |        C3 | [68.90, 76.29]
## 3 |     78.59 | 1.89 |        C3 | [74.90, 82.29]
## 4 |     77.01 | 1.89 |        C3 | [73.31, 80.71]
## 
## # Catalizador = C1
## # Temperatura = T2
## 
## x | Predicted |   SE | group_col |         95% CI
## -------------------------------------------------
## 1 |     65.50 | 1.89 |        C1 | [61.80, 69.20]
## 2 |     69.69 | 1.89 |        C1 | [66.00, 73.39]
## 3 |     75.69 | 1.89 |        C1 | [72.00, 79.39]
## 4 |     74.11 | 1.89 |        C1 | [70.41, 77.81]
## 
## # Catalizador = C2
## # Temperatura = T2
## 
## x | Predicted |   SE | group_col |         95% CI
## -------------------------------------------------
## 1 |     75.14 | 1.89 |        C2 | [71.45, 78.84]
## 2 |     79.34 | 1.89 |        C2 | [75.64, 83.03]
## 3 |     85.34 | 1.89 |        C2 | [81.64, 89.03]
## 4 |     83.75 | 1.89 |        C2 | [80.06, 87.45]
## 
## # Catalizador = C3
## # Temperatura = T2
## 
## x | Predicted |   SE | group_col |         95% CI
## -------------------------------------------------
## 1 |     76.68 | 1.89 |        C3 | [72.99, 80.38]
## 2 |     80.88 | 1.89 |        C3 | [77.18, 84.57]
## 3 |     86.88 | 1.89 |        C3 | [83.18, 90.57]
## 4 |     85.29 | 1.89 |        C3 | [81.60, 88.99]
# Gráfico
p

Ejercicio 6

densidad <- c(21.8, 21.9, 21.7, 21.6, 21.7, 21.7, 21.4, 21.5, 21.4,
              21.9, 21.8, 21.8, 21.6, 21.5, 21.9, 22.1, 21.85, 21.9)
temperatura <- c(rep("T100", 5), rep("T125", 4), rep("T150", 5),
                 rep("T175", 4))
ejer06 <- data.frame(densidad, temperatura)
ejer06$temperatura <- as.ordered(ejer06$temperatura)
str(ejer06)
## 'data.frame':    18 obs. of  2 variables:
##  $ densidad   : num  21.8 21.9 21.7 21.6 21.7 21.7 21.4 21.5 21.4 21.9 ...
##  $ temperatura: Ord.factor w/ 4 levels "T100"<"T125"<..: 1 1 1 1 1 2 2 2 2 3 ...

Análisis preliminar

ggplot(ejer06, aes(x = temperatura, y = densidad)) + geom_boxplot()

Selección del modelo

fit.M1 <- lm(densidad ~ temperatura, data = ejer06)
ols_step_backward_p(fit.M1, prem = 0.05)
## [1] "No variables have been removed from the model."

Estimación

# Estimación del modelo ajustado
tab_model(fit.M1, 
          show.r2 = FALSE, 
          show.p = FALSE)
  densidad
Predictors Estimates CI
(Intercept) 21.72 21.66 – 21.79
temperatura.L 0.18 0.04 – 0.32
temperatura.Q 0.23 0.09 – 0.37
temperatura.C -0.10 -0.24 – 0.03
Observations 18
# Comparaciones múltiples
TukeyHSD(fit.M1)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = x)
## 
## $temperatura
##              diff         lwr        upr     p adj
## T125-T100 -0.2400 -0.50410917 0.02410917 0.0807121
## T150-T100 -0.0200 -0.26900452 0.22900452 0.9953076
## T175-T100  0.1975 -0.06660917 0.46160917 0.1785203
## T150-T125  0.2200 -0.04410917 0.48410917 0.1184044
## T175-T125  0.4375  0.15910449 0.71589551 0.0021907
## T175-T150  0.2175 -0.04660917 0.48160917 0.1240769
plot(TukeyHSD(fit.M1), las=1, tcl = -.3)

Diagnóstico

# Valores de diagnóstico
diagnostico <- fortify(fit.M1)
# Gráfico
ggplot(diagnostico,aes(x = temperatura, y = .stdresid)) + 
   geom_boxplot() +
   geom_hline(yintercept = 0, col = "red")

# Tests de hipótesis
ols_test_normality(fit.M1)
## Warning in ks.test(y, "pnorm", mean(y), sd(y)): ties should not be present for
## the Kolmogorov-Smirnov test
## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.9464         0.3712 
## Kolmogorov-Smirnov        0.1754         0.6370 
## Cramer-von Mises          4.5765         0.0000 
## Anderson-Darling          0.4192         0.2924 
## -----------------------------------------------
leveneTest(.stdresid ~ temperatura, data = diagnostico)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  0.2428 0.8651
##       14
ols_plot_cooksd_chart(fit.M1)

Predicción

# Objeto gráfico
p <- plot_model(fit.M1, "pred", terms = "temperatura",
           show.stat = TRUE, 
           title ="")
# Tabla de estimaciones
p$data
## 
## # Predicted values of densidad
## # x = temperatura
## 
## x | Predicted |   SE | group_col |         95% CI
## -------------------------------------------------
## 1 |     21.74 | 0.06 |         1 | [21.62, 21.86]
## 2 |     21.50 | 0.07 |         1 | [21.37, 21.63]
## 3 |     21.72 | 0.06 |         1 | [21.60, 21.84]
## 4 |     21.94 | 0.07 |         1 | [21.80, 22.07]
# Gráficos
p