# Lectura de datos
ejer01 <- read_csv("https://goo.gl/3L4EtK", col_types = "cd")
str(ejer01)
## tibble [31 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Group : chr [1:31] "H-O" "H-O" "H-O" "H-O" ...
## $ phosphate: num [1:31] 2.3 4.1 4.2 4 4.6 4.6 3.8 5.2 3.1 3.7 ...
## - attr(*, "spec")=
## .. cols(
## .. Group = col_character(),
## .. phosphate = col_double()
## .. )
Análisis preliminar
ggplot(ejer01, aes(x = Group, y = phosphate)) + geom_boxplot()
Selección del modelo
fit.M1 <- lm(phosphate ~ Group, data = ejer01)
ols_step_backward_p(fit.M1, prem = 0.05)
## [1] "No variables have been removed from the model."
Estimación
# Estimación del modelo ajustado
tab_model(fit.M1,
show.r2 = FALSE,
show.p = FALSE)
 | phosphate | |
---|---|---|
Predictors | Estimates | CI |
(Intercept) | 2.78 | 2.44 – 3.13 |
Group [H-O] | 1.16 | 0.67 – 1.66 |
Group [N-O] | 0.65 | 0.11 – 1.20 |
Observations | 31 |
# Comparaciones múltiples
TukeyHSD(fit.M1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = x)
##
## $Group
## diff lwr upr p adj
## H-O-C 1.1621212 0.5642291555 1.7600133 0.0001347
## N-O-C 0.6541667 0.0003963287 1.3079370 0.0498380
## N-O-H-O -0.5079545 -1.1735054769 0.1575964 0.1607135
plot(TukeyHSD(fit.M1), las=1, tcl = -.3)
Diagnóstico
# Valores de diagnóstico
diagnostico <- fortify(fit.M1)
# Gráfico
ggplot(diagnostico,aes(x = Group, y = .stdresid)) +
geom_boxplot() +
geom_hline(yintercept = 0, col = "red")
# Tests de hipótesis
ols_test_normality(fit.M1)
## Warning in ks.test(y, "pnorm", mean(y), sd(y)): ties should not be present for
## the Kolmogorov-Smirnov test
## -----------------------------------------------
## Test Statistic pvalue
## -----------------------------------------------
## Shapiro-Wilk 0.9691 0.4942
## Kolmogorov-Smirnov 0.0858 0.9764
## Cramer-von Mises 3.2934 0.0000
## Anderson-Darling 0.2831 0.6106
## -----------------------------------------------
leveneTest(.stdresid ~ Group, data = diagnostico)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.0069 0.3782
## 28
ols_plot_cooksd_chart(fit.M1)
Predicción
# Objeto gráfico
p <- plot_model(fit.M1, "pred", terms = "Group",
show.stat = TRUE,
title ="")
# Tabla de estimaciones
p$data
##
## # Predicted values of phosphate
## # x = Group
##
## x | Predicted | SE | group_col | 95% CI
## -----------------------------------------------
## 1 | 3.95 | 0.17 | 1 | [3.60, 4.29]
## 2 | 3.44 | 0.20 | 1 | [3.04, 3.84]
## 3 | 2.78 | 0.17 | 1 | [2.46, 3.11]
# Lectura de datos
ejer02 <- read_csv("https://goo.gl/J2ZKWK", col_types = "dcc")
str(ejer02)
## tibble [100 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Butterfat: num [1:100] 3.74 4.01 3.77 3.78 4.1 4.06 4.27 3.94 4.11 4.25 ...
## $ Bread : chr [1:100] "Ayrshire" "Ayrshire" "Ayrshire" "Ayrshire" ...
## $ Age : chr [1:100] "Mature" "2year" "Mature" "2year" ...
## - attr(*, "spec")=
## .. cols(
## .. Butterfat = col_double(),
## .. Bread = col_character(),
## .. Age = col_character()
## .. )
Análisis preliminar
ggplot(ejer02,aes(x = Age, y = Butterfat, color = Bread)) +
geom_boxplot()
Selección del modelo
fit.M1 <- lm(Butterfat ~ Age*Bread, data = ejer02)
ols_step_backward_p(fit.M1, prem = 0.05)
##
##
## Elimination Summary
## --------------------------------------------------------------------------
## Variable Adj.
## Step Removed R-Square R-Square C(p) AIC RMSE
## --------------------------------------------------------------------------
## 1 Age:Bread 0.6825 0.6656 -1.0316 115.1153 0.4138
## 2 Age 0.6771 0.6635 -1.4515 114.8006 0.4151
## --------------------------------------------------------------------------
fit.M1 <- lm(Butterfat ~ Bread, data = ejer02)
Estimación
# Estimación del modelo ajustado
tab_model(fit.M1,
show.r2 = FALSE,
show.p = FALSE)
 | Butterfat | |
---|---|---|
Predictors | Estimates | CI |
(Intercept) | 4.06 | 3.88 – 4.24 |
Bread [Canadian] | 0.38 | 0.12 – 0.64 |
Bread [Guernsey] | 0.89 | 0.63 – 1.15 |
Bread [Holstein-Fresian] | -0.39 | -0.65 – -0.13 |
Bread [Jersey] | 1.23 | 0.97 – 1.49 |
Observations | 100 |
# Comparaciones múltiples
TukeyHSD(fit.M1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = x)
##
## $Bread
## diff lwr upr p adj
## Canadian-Ayrshire 0.3785 0.01348598 0.74351402 0.0381804
## Guernsey-Ayrshire 0.8900 0.52498598 1.25501402 0.0000000
## Holstein-Fresian-Ayrshire -0.3905 -0.75551402 -0.02548598 0.0297910
## Jersey-Ayrshire 1.2325 0.86748598 1.59751402 0.0000000
## Guernsey-Canadian 0.5115 0.14648598 0.87651402 0.0016669
## Holstein-Fresian-Canadian -0.7690 -1.13401402 -0.40398598 0.0000007
## Jersey-Canadian 0.8540 0.48898598 1.21901402 0.0000000
## Holstein-Fresian-Guernsey -1.2805 -1.64551402 -0.91548598 0.0000000
## Jersey-Guernsey 0.3425 -0.02251402 0.70751402 0.0767002
## Jersey-Holstein-Fresian 1.6230 1.25798598 1.98801402 0.0000000
plot(TukeyHSD(fit.M1), las=1, tcl = -.3)
Diagnóstico
# Valores de diagnóstico
diagnostico <- fortify(fit.M1)
# Gráfico
ggplot(diagnostico,aes(x = Bread, y = .stdresid)) +
geom_boxplot() +
geom_hline(yintercept = 0, col = "red")
# Tests de hipótesis
ols_test_normality(fit.M1)
## Warning in ks.test(y, "pnorm", mean(y), sd(y)): ties should not be present for
## the Kolmogorov-Smirnov test
## -----------------------------------------------
## Test Statistic pvalue
## -----------------------------------------------
## Shapiro-Wilk 0.968 0.0157
## Kolmogorov-Smirnov 0.1011 0.2588
## Cramer-von Mises 15.8037 0.0000
## Anderson-Darling 0.986 0.0128
## -----------------------------------------------
leveneTest(.stdresid ~ Bread, data = diagnostico)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 3.1914 0.01661 *
## 95
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ols_plot_cooksd_chart(fit.M1)
Box-Cox
MASS::boxcox(fit.M1)
# Creamos la variable transformada
ejer02 <- ejer02 %>% mutate(Butterfatinv = 1/Butterfat)
# Ajustamos el modelo de nuevo
fit.M1.inv <- lm(Butterfatinv ~ Age*Bread, data = ejer02)
# Selección utilizando el test F
ols_step_backward_p(fit.M1.inv, prem = 0.05)
##
##
## Elimination Summary
## ---------------------------------------------------------------------------
## Variable Adj.
## Step Removed R-Square R-Square C(p) AIC RMSE
## ---------------------------------------------------------------------------
## 1 Age:Bread 0.7283 0.7138 -1.1215 -503.6001 0.0188
## 2 Age 0.7225 0.7109 -1.1618 -503.5122 0.0189
## ---------------------------------------------------------------------------
fit.M1.inv <- lm(Butterfatinv ~ Bread, data = ejer02)
# Estimación del modelo ajustado
tab_model(fit.M1.inv,
show.r2 = FALSE,
show.p = FALSE)
 | Butterfatinv | |
---|---|---|
Predictors | Estimates | CI |
(Intercept) | 0.25 | 0.24 – 0.26 |
Bread [Canadian] | -0.02 | -0.03 – -0.01 |
Bread [Guernsey] | -0.04 | -0.06 – -0.03 |
Bread [Holstein-Fresian] | 0.03 | 0.01 – 0.04 |
Bread [Jersey] | -0.06 | -0.07 – -0.04 |
Observations | 100 |
# Comparaciones múltiples
TukeyHSD(fit.M1.inv)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = x)
##
## $Bread
## diff lwr upr p adj
## Canadian-Ayrshire -0.02059248 -0.037175383 -0.004009586 0.0072424
## Guernsey-Ayrshire -0.04341678 -0.059999681 -0.026833884 0.0000000
## Holstein-Fresian-Ayrshire 0.02642525 0.009842347 0.043008144 0.0002392
## Jersey-Ayrshire -0.05609240 -0.072675300 -0.039509503 0.0000000
## Guernsey-Canadian -0.02282430 -0.039407196 -0.006241399 0.0021164
## Holstein-Fresian-Canadian 0.04701773 0.030434832 0.063600629 0.0000000
## Jersey-Canadian -0.03549992 -0.052082816 -0.018917018 0.0000004
## Holstein-Fresian-Guernsey 0.06984203 0.053259129 0.086424927 0.0000000
## Jersey-Guernsey -0.01267562 -0.029258518 0.003907279 0.2179051
## Jersey-Holstein-Fresian -0.08251765 -0.099100546 -0.065934749 0.0000000
plot(TukeyHSD(fit.M1.inv), las=1, tcl = -.3)
Diagnóstico
# Valores de diagnóstico
diagnostico <- fortify(fit.M1.inv)
# Gráfico
ggplot(diagnostico,aes(x = Bread, y = .stdresid)) +
geom_boxplot() +
geom_hline(yintercept = 0, col = "red")
# Tests de hipótesis
ols_test_normality(fit.M1.inv)
## Warning in ks.test(y, "pnorm", mean(y), sd(y)): ties should not be present for
## the Kolmogorov-Smirnov test
## -----------------------------------------------
## Test Statistic pvalue
## -----------------------------------------------
## Shapiro-Wilk 0.9922 0.8364
## Kolmogorov-Smirnov 0.0605 0.8581
## Cramer-von Mises 32.1197 0.0000
## Anderson-Darling 0.2898 0.6062
## -----------------------------------------------
leveneTest(.stdresid ~ Bread, data = diagnostico)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 0.2311 0.9203
## 95
ols_plot_cooksd_chart(fit.M1.inv)
# Objeto gráfico
p <- plot_model(fit.M1.inv, "pred", terms = "Bread",
show.stat = TRUE,
title ="")
# Tabla de estimaciones
p$data
##
## # Predicted values of Butterfatinv
## # x = Bread
##
## x | Predicted | SE | group_col | 95% CI
## ---------------------------------------------
## 1 | 0.25 | 0 | 1 | [0.24, 0.26]
## 2 | 0.23 | 0 | 1 | [0.22, 0.23]
## 3 | 0.20 | 0 | 1 | [0.20, 0.21]
## 4 | 0.27 | 0 | 1 | [0.27, 0.28]
## 5 | 0.19 | 0 | 1 | [0.18, 0.20]
# Lectura de datos
ejer04 <- read_csv("https://bit.ly/2GhFsl7", col_types = "dccc")
ejer04 <- ejer04 %>%
mutate_if(sapply(ejer04, is.character), as.factor)
str(ejer04)
## tibble [72 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Rendimiento : num [1:72] 72.2 74.4 64.3 62.5 65.8 71.2 69 70.3 68.8 65 ...
## $ Concentracion: Factor w/ 4 levels "A","B","C","D": 1 1 1 1 1 1 1 1 1 1 ...
## $ Catalizador : Factor w/ 3 levels "C1","C2","C3": 1 1 1 2 2 2 3 3 3 1 ...
## $ Temperatura : Factor w/ 2 levels "T1","T2": 1 1 1 1 1 1 1 1 1 2 ...
Análisis preliminar
ggplot(ejer04, aes(x = Concentracion, y = Rendimiento, color = Catalizador)) +
facet_wrap(vars(Temperatura)) +
geom_boxplot()
ejer04$Concentracion <- as.ordered(ejer04$Concentracion)
ggplot(ejer04, aes(x = Concentracion, y = Rendimiento, color = Catalizador)) +
facet_wrap(vars(Temperatura)) +
geom_boxplot()
Selección del modelo
fit.M1 <- lm(Rendimiento ~ Concentracion*Catalizador*Temperatura, data = ejer04)
ols_step_backward_p(fit.M1, prem = 0.05)
## Note: model has aliased coefficients
## sums of squares computed by model comparison
##
##
## Elimination Summary
## -------------------------------------------------------------------------------------------------------
## Variable Adj.
## Step Removed R-Square R-Square C(p) AIC RMSE
## -------------------------------------------------------------------------------------------------------
## 1 Concentracion:Temperatura 0.6931 0.5461 -10.0000 464.1742 5.2593
## 2 Concentracion:Catalizador 0.6931 0.5461 -12.0000 470.1742 5.2593
## 3 Concentracion:Catalizador:Temperatura 0.5857 0.5331 2.8068 455.7893 5.3342
## -------------------------------------------------------------------------------------------------------
fit.M1 <- lm(Rendimiento ~ Concentracion + Catalizador + Temperatura + Temperatura:Catalizador, data = ejer04)
# Estimación del modelo ajustado
tab_model(fit.M1, wrap.labels = 150,
show.r2 = FALSE,
show.p = FALSE)
 | Rendimiento | |
---|---|---|
Predictors | Estimates | CI |
(Intercept) | 71.95 | 68.87 – 75.03 |
Concentracion.L | 7.12 | 4.61 – 9.63 |
Concentracion.Q | -2.89 | -5.40 – -0.38 |
Concentracion.C | -2.10 | -4.61 – 0.41 |
Catalizador [C2] | 1.01 | -3.34 – 5.36 |
Catalizador [C3] | 2.20 | -2.15 – 6.55 |
Temperatura [T2] | -0.70 | -5.05 – 3.65 |
Catalizador [C2] * Temperatura [T2] | 8.63 | 2.48 – 14.79 |
Catalizador [C3] * Temperatura [T2] | 8.98 | 2.83 – 15.14 |
Observations | 72 |
Diagnóstico
# Valores de diagnóstico
diagnostico <- fortify(fit.M1)
# Tests de hipótesis
ols_test_normality(fit.M1)
## -----------------------------------------------
## Test Statistic pvalue
## -----------------------------------------------
## Shapiro-Wilk 0.9757 0.1764
## Kolmogorov-Smirnov 0.0689 0.8609
## Cramer-von Mises 4.8872 0.0000
## Anderson-Darling 0.3672 0.4227
## -----------------------------------------------
leveneTest(.stdresid ~ Concentracion*Catalizador*Temperatura, data = diagnostico)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 23 0.4794 0.9707
## 48
ols_plot_cooksd_chart(fit.M1)
# Objeto gráfico
p <- plot_model(fit.M1, "pred", terms = c("Concentracion", "Catalizador", "Temperatura"),
show.stat = TRUE,
title ="")
# Tabla de estimaciones
p$data
##
## # Predicted values of Rendimiento
## # x = Concentracion
##
## # Catalizador = C1
## # Temperatura = T1
##
## x | Predicted | SE | group_col | 95% CI
## -------------------------------------------------
## 1 | 66.20 | 1.89 | C1 | [62.50, 69.90]
## 2 | 70.39 | 1.89 | C1 | [66.70, 74.09]
## 3 | 76.39 | 1.89 | C1 | [72.70, 80.09]
## 4 | 74.81 | 1.89 | C1 | [71.11, 78.51]
##
## # Catalizador = C2
## # Temperatura = T1
##
## x | Predicted | SE | group_col | 95% CI
## -------------------------------------------------
## 1 | 67.21 | 1.89 | C2 | [63.51, 70.90]
## 2 | 71.40 | 1.89 | C2 | [67.71, 75.10]
## 3 | 77.40 | 1.89 | C2 | [73.71, 81.10]
## 4 | 75.82 | 1.89 | C2 | [72.12, 79.52]
##
## # Catalizador = C3
## # Temperatura = T1
##
## x | Predicted | SE | group_col | 95% CI
## -------------------------------------------------
## 1 | 68.40 | 1.89 | C3 | [64.70, 72.10]
## 2 | 72.59 | 1.89 | C3 | [68.90, 76.29]
## 3 | 78.59 | 1.89 | C3 | [74.90, 82.29]
## 4 | 77.01 | 1.89 | C3 | [73.31, 80.71]
##
## # Catalizador = C1
## # Temperatura = T2
##
## x | Predicted | SE | group_col | 95% CI
## -------------------------------------------------
## 1 | 65.50 | 1.89 | C1 | [61.80, 69.20]
## 2 | 69.69 | 1.89 | C1 | [66.00, 73.39]
## 3 | 75.69 | 1.89 | C1 | [72.00, 79.39]
## 4 | 74.11 | 1.89 | C1 | [70.41, 77.81]
##
## # Catalizador = C2
## # Temperatura = T2
##
## x | Predicted | SE | group_col | 95% CI
## -------------------------------------------------
## 1 | 75.14 | 1.89 | C2 | [71.45, 78.84]
## 2 | 79.34 | 1.89 | C2 | [75.64, 83.03]
## 3 | 85.34 | 1.89 | C2 | [81.64, 89.03]
## 4 | 83.75 | 1.89 | C2 | [80.06, 87.45]
##
## # Catalizador = C3
## # Temperatura = T2
##
## x | Predicted | SE | group_col | 95% CI
## -------------------------------------------------
## 1 | 76.68 | 1.89 | C3 | [72.99, 80.38]
## 2 | 80.88 | 1.89 | C3 | [77.18, 84.57]
## 3 | 86.88 | 1.89 | C3 | [83.18, 90.57]
## 4 | 85.29 | 1.89 | C3 | [81.60, 88.99]
# Gráfico
p
densidad <- c(21.8, 21.9, 21.7, 21.6, 21.7, 21.7, 21.4, 21.5, 21.4,
21.9, 21.8, 21.8, 21.6, 21.5, 21.9, 22.1, 21.85, 21.9)
temperatura <- c(rep("T100", 5), rep("T125", 4), rep("T150", 5),
rep("T175", 4))
ejer06 <- data.frame(densidad, temperatura)
ejer06$temperatura <- as.ordered(ejer06$temperatura)
str(ejer06)
## 'data.frame': 18 obs. of 2 variables:
## $ densidad : num 21.8 21.9 21.7 21.6 21.7 21.7 21.4 21.5 21.4 21.9 ...
## $ temperatura: Ord.factor w/ 4 levels "T100"<"T125"<..: 1 1 1 1 1 2 2 2 2 3 ...
Análisis preliminar
ggplot(ejer06, aes(x = temperatura, y = densidad)) + geom_boxplot()
Selección del modelo
fit.M1 <- lm(densidad ~ temperatura, data = ejer06)
ols_step_backward_p(fit.M1, prem = 0.05)
## [1] "No variables have been removed from the model."
Estimación
# Estimación del modelo ajustado
tab_model(fit.M1,
show.r2 = FALSE,
show.p = FALSE)
 | densidad | |
---|---|---|
Predictors | Estimates | CI |
(Intercept) | 21.72 | 21.66 – 21.79 |
temperatura.L | 0.18 | 0.04 – 0.32 |
temperatura.Q | 0.23 | 0.09 – 0.37 |
temperatura.C | -0.10 | -0.24 – 0.03 |
Observations | 18 |
# Comparaciones múltiples
TukeyHSD(fit.M1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = x)
##
## $temperatura
## diff lwr upr p adj
## T125-T100 -0.2400 -0.50410917 0.02410917 0.0807121
## T150-T100 -0.0200 -0.26900452 0.22900452 0.9953076
## T175-T100 0.1975 -0.06660917 0.46160917 0.1785203
## T150-T125 0.2200 -0.04410917 0.48410917 0.1184044
## T175-T125 0.4375 0.15910449 0.71589551 0.0021907
## T175-T150 0.2175 -0.04660917 0.48160917 0.1240769
plot(TukeyHSD(fit.M1), las=1, tcl = -.3)
Diagnóstico
# Valores de diagnóstico
diagnostico <- fortify(fit.M1)
# Gráfico
ggplot(diagnostico,aes(x = temperatura, y = .stdresid)) +
geom_boxplot() +
geom_hline(yintercept = 0, col = "red")
# Tests de hipótesis
ols_test_normality(fit.M1)
## Warning in ks.test(y, "pnorm", mean(y), sd(y)): ties should not be present for
## the Kolmogorov-Smirnov test
## -----------------------------------------------
## Test Statistic pvalue
## -----------------------------------------------
## Shapiro-Wilk 0.9464 0.3712
## Kolmogorov-Smirnov 0.1754 0.6370
## Cramer-von Mises 4.5765 0.0000
## Anderson-Darling 0.4192 0.2924
## -----------------------------------------------
leveneTest(.stdresid ~ temperatura, data = diagnostico)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.2428 0.8651
## 14
ols_plot_cooksd_chart(fit.M1)
Predicción
# Objeto gráfico
p <- plot_model(fit.M1, "pred", terms = "temperatura",
show.stat = TRUE,
title ="")
# Tabla de estimaciones
p$data
##
## # Predicted values of densidad
## # x = temperatura
##
## x | Predicted | SE | group_col | 95% CI
## -------------------------------------------------
## 1 | 21.74 | 0.06 | 1 | [21.62, 21.86]
## 2 | 21.50 | 0.07 | 1 | [21.37, 21.63]
## 3 | 21.72 | 0.06 | 1 | [21.60, 21.84]
## 4 | 21.94 | 0.07 | 1 | [21.80, 22.07]
# Gráficos
p