Base de datos
data(birthwt, package = "MASS")
library(tidyverse)
birthwt <- birthwt %>%
mutate(
age = as.numeric(age),
lwt = as.numeric(lwt),
smoke = factor(smoke, labels = c("Non-smoker", "Smoker")),
race = factor(race, labels = c("White", "African American", "Other")),
bwt = as.numeric(bwt)
) %>%
var_labels(
bwt = 'Birth weight (g)',
smoke = 'Smoking status',
race = 'Race'
)
state: estado
year: año
cpi: índice de precios al consumidor
pop: población estatal
packpc: número de paquetes consumidos per cápita
income: ingresos personales estatales (total, nominal)
tax: promedio de impuestos especiales estatales, federales y locales promedio para el año fiscal
avgprs: precio promedio durante el año fiscal, incluidos los impuestos sobre las ventas
taxs: impuestos especiales promedio para el año fiscal, incluidos los impuestos sobre las ventas
glimpse(birthwt)
## Rows: 189
## Columns: 10
## $ low <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ age <dbl> 19, 33, 20, 21, 18, 21, 22, 17, 29, 26, 19, 19, 22, 30, 18, 18,…
## $ lwt <dbl> 182, 155, 105, 108, 107, 124, 118, 103, 123, 113, 95, 150, 95, …
## $ race <fct> African American, Other, White, White, White, Other, White, Oth…
## $ smoke <fct> Non-smoker, Non-smoker, Smoker, Smoker, Smoker, Non-smoker, Non…
## $ ptl <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ ht <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ui <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, …
## $ ftv <int> 0, 3, 1, 2, 0, 0, 1, 1, 1, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 1, 2, …
## $ bwt <dbl> 2523, 2551, 2557, 2594, 2600, 2622, 2637, 2637, 2663, 2665, 272…
Análisis exploratorio
birthwt %>%
group_by(race, smoke) %>%
summarise(
n = n(),
Mean = mean(bwt, na.rm = TRUE),
SD = sd(bwt, na.rm = TRUE),
Median = median(bwt, na.rm = TRUE),
CV = rel_dis(bwt)
)
## # A tibble: 6 x 7
## # Groups: race [3]
## race smoke n Mean SD Median CV
## <fct> <fct> <int> <dbl> <dbl> <dbl> <dbl>
## 1 White Non-smoker 44 3429. 710. 3593 0.207
## 2 White Smoker 52 2827. 626. 2776. 0.222
## 3 African American Non-smoker 16 2854. 621. 2920 0.218
## 4 African American Smoker 10 2504 637. 2381 0.254
## 5 Other Non-smoker 55 2816. 709. 2807 0.252
## 6 Other Smoker 12 2757. 810. 3146. 0.294
birthwt %>%
gen_bst_df(bwt ~ race|smoke)
| Birth weight (g) | LowerCI | UpperCI | Race | Smoking status |
| 3.43e+03 | 3.21e+03 | 3.63e+03 | White | Non-smoker |
| 2.83e+03 | 2.67e+03 | 2.99e+03 | White | Smoker |
| 2.85e+03 | 2.58e+03 | 3.16e+03 | African American | Non-smoker |
| 2.5e+03 | 2.1e+03 | 2.86e+03 | African American | Smoker |
| 2.82e+03 | 2.62e+03 | 3.01e+03 | Other | Non-smoker |
| 2.76e+03 | 2.29e+03 | 3.15e+03 | Other | Smoker |
birthwt %>%
bar_error(bwt ~ race, fill = ~ smoke) %>%
axis_labs() %>%
gf_labs(fill = "Smoking status:")

Análisis de correlación
library(PerformanceAnalytics)
chart.Correlation(birthwt[,c(2,3,10)], histogram = TRUE, pch = 19)

Análisis de datos faltantes
sapply(birthwt, function(x) sum(is.na(x)))
## low age lwt race smoke ptl ht ui ftv bwt
## 0 0 0 0 0 0 0 0 0 0
cor.mtest <- function(mat, ...) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
p.mat <- cor.mtest(birthwt[,c(2,3,10)])
library(corrplot)
birthwt.cor <- cor(birthwt[,c(2,3,10)])
corrplot(birthwt.cor, method = "number", type = "upper",
tl.cex = 0.9, number.cex = 0.6, order="hclust", diag = FALSE,
addCoef.col = "black", tl.col = "black",
p.mat = p.mat, sig.level = 0.05, insig = "blank")

Ajuste de un modelo lineal a los datos
model_norm <- lm(bwt ~ smoke + race, data = birthwt.train)
Diagnósticos del modelo lineal
library(ggfortify)
autoplot(model_norm)

Resúmen del modelo lineal
model_norm %>% augment() %>% as_tibble()
| bwt | smoke | race | .fitted | .resid | .std.resid | .hat | .sigma | .cooksd |
| 3.94e+03 | Non-smoker | Other | 2.91e+03 | 1.03e+03 | 1.69 | 0.0208 | 611 | 0.0152 |
| 3.27e+03 | Non-smoker | Other | 2.91e+03 | 364 | 0.597 | 0.0208 | 617 | 0.00189 |
| 3.77e+03 | Non-smoker | Other | 2.91e+03 | 860 | 1.41 | 0.0208 | 613 | 0.0106 |
| 2.77e+03 | Smoker | White | 2.91e+03 | -139 | -0.229 | 0.019 | 617 | 0.000253 |
| 3.06e+03 | Non-smoker | White | 3.34e+03 | -276 | -0.454 | 0.0221 | 617 | 0.00116 |
| 3.06e+03 | Smoker | White | 2.91e+03 | 154 | 0.252 | 0.019 | 617 | 0.000307 |
| 3.57e+03 | Smoker | Other | 2.48e+03 | 1.09e+03 | 1.81 | 0.0394 | 611 | 0.0336 |
| 2.3e+03 | Non-smoker | Other | 2.91e+03 | -609 | -1 | 0.0208 | 615 | 0.00532 |
| 2.73e+03 | Non-smoker | Other | 2.91e+03 | -177 | -0.291 | 0.0208 | 617 | 0.000451 |
| 2.95e+03 | Smoker | White | 2.91e+03 | 39.6 | 0.065 | 0.019 | 618 | 2.04e-05 |
| 2.66e+03 | Smoker | White | 2.91e+03 | -245 | -0.403 | 0.019 | 617 | 0.000784 |
| 2.47e+03 | Smoker | White | 2.91e+03 | -442 | -0.726 | 0.019 | 616 | 0.00255 |
| 2.66e+03 | Smoker | White | 2.91e+03 | -243 | -0.399 | 0.019 | 617 | 0.000771 |
| 2.08e+03 | Smoker | White | 2.91e+03 | -824 | -1.35 | 0.019 | 614 | 0.00885 |
| 2.84e+03 | Non-smoker | Other | 2.91e+03 | -75.5 | -0.124 | 0.0208 | 617 | 8.15e-05 |
| 2.06e+03 | Non-smoker | Other | 2.91e+03 | -855 | -1.4 | 0.0208 | 613 | 0.0105 |
| 2.92e+03 | Smoker | White | 2.91e+03 | 13.6 | 0.0223 | 0.019 | 618 | 2.41e-06 |
| 3.2e+03 | Non-smoker | Other | 2.91e+03 | 293 | 0.48 | 0.0208 | 617 | 0.00122 |
| 1.89e+03 | Non-smoker | Other | 2.91e+03 | -1.02e+03 | -1.67 | 0.0208 | 612 | 0.0148 |
| 2.41e+03 | Smoker | White | 2.91e+03 | -494 | -0.811 | 0.019 | 616 | 0.00318 |
| 3.32e+03 | Non-smoker | African American | 3.02e+03 | 298 | 0.498 | 0.0506 | 617 | 0.0033 |
| 2.75e+03 | Non-smoker | Other | 2.91e+03 | -159 | -0.262 | 0.0208 | 617 | 0.000364 |
| 3.06e+03 | Smoker | White | 2.91e+03 | 154 | 0.252 | 0.019 | 617 | 0.000307 |
| 1.59e+03 | Non-smoker | Other | 2.91e+03 | -1.32e+03 | -2.17 | 0.0208 | 608 | 0.025 |
| 4.05e+03 | Non-smoker | White | 3.34e+03 | 716 | 1.18 | 0.0221 | 615 | 0.00783 |
| 3.86e+03 | Smoker | White | 2.91e+03 | 948 | 1.55 | 0.019 | 612 | 0.0117 |
| 2.44e+03 | Non-smoker | African American | 3.02e+03 | -581 | -0.968 | 0.0506 | 616 | 0.0125 |
| 2.5e+03 | Non-smoker | African American | 3.02e+03 | -524 | -0.873 | 0.0506 | 616 | 0.0102 |
| 3.94e+03 | Smoker | White | 2.91e+03 | 1.03e+03 | 1.69 | 0.019 | 611 | 0.0139 |
| 2.82e+03 | Smoker | White | 2.91e+03 | -87.4 | -0.143 | 0.019 | 617 | 9.94e-05 |
| 3.63e+03 | Smoker | White | 2.91e+03 | 721 | 1.18 | 0.019 | 615 | 0.00676 |
| 1.47e+03 | Non-smoker | Other | 2.91e+03 | -1.44e+03 | -2.36 | 0.0208 | 606 | 0.0295 |
| 2.47e+03 | Smoker | Other | 2.48e+03 | -14.7 | -0.0244 | 0.0394 | 618 | 6.12e-06 |
| 3.94e+03 | Non-smoker | White | 3.34e+03 | 603 | 0.991 | 0.0221 | 615 | 0.00555 |
| 3.76e+03 | Smoker | White | 2.91e+03 | 848 | 1.39 | 0.019 | 613 | 0.00936 |
| 3.1e+03 | Non-smoker | Other | 2.91e+03 | 194 | 0.318 | 0.0208 | 617 | 0.000536 |
| 3.09e+03 | Non-smoker | White | 3.34e+03 | -248 | -0.408 | 0.0221 | 617 | 0.00094 |
| 3.32e+03 | Smoker | Other | 2.48e+03 | 840 | 1.39 | 0.0394 | 613 | 0.0199 |
| 4.59e+03 | Non-smoker | White | 3.34e+03 | 1.25e+03 | 2.06 | 0.0221 | 609 | 0.0241 |
| 3.33e+03 | Smoker | Other | 2.48e+03 | 850 | 1.41 | 0.0394 | 613 | 0.0204 |
| 2.12e+03 | Smoker | Other | 2.48e+03 | -356 | -0.59 | 0.0394 | 617 | 0.00357 |
| 2.13e+03 | Smoker | African American | 2.59e+03 | -463 | -0.774 | 0.0561 | 616 | 0.00891 |
| 2.37e+03 | Smoker | African American | 2.59e+03 | -222 | -0.371 | 0.0561 | 617 | 0.00205 |
| 3.54e+03 | Non-smoker | Other | 2.91e+03 | 634 | 1.04 | 0.0208 | 615 | 0.00574 |
| 2.32e+03 | Non-smoker | Other | 2.91e+03 | -585 | -0.961 | 0.0208 | 616 | 0.0049 |
| 2.08e+03 | Smoker | White | 2.91e+03 | -824 | -1.35 | 0.019 | 614 | 0.00885 |
| 2.45e+03 | Non-smoker | Other | 2.91e+03 | -460 | -0.756 | 0.0208 | 616 | 0.00303 |
| 3.15e+03 | Smoker | White | 2.91e+03 | 239 | 0.391 | 0.019 | 617 | 0.000741 |
| 3.03e+03 | Smoker | White | 2.91e+03 | 125 | 0.204 | 0.019 | 617 | 0.000202 |
| 2.5e+03 | Smoker | Other | 2.48e+03 | 14.3 | 0.0236 | 0.0394 | 618 | 5.73e-06 |
| 3.8e+03 | Non-smoker | White | 3.34e+03 | 461 | 0.757 | 0.0221 | 616 | 0.00324 |
| 3.91e+03 | Non-smoker | White | 3.34e+03 | 574 | 0.943 | 0.0221 | 616 | 0.00503 |
| 1.73e+03 | Non-smoker | Other | 2.91e+03 | -1.18e+03 | -1.94 | 0.0208 | 610 | 0.02 |
| 2.28e+03 | Non-smoker | Other | 2.91e+03 | -628 | -1.03 | 0.0208 | 615 | 0.00565 |
| 2.35e+03 | Non-smoker | White | 3.34e+03 | -985 | -1.62 | 0.0221 | 612 | 0.0148 |
| 3.77e+03 | Non-smoker | Other | 2.91e+03 | 860 | 1.41 | 0.0208 | 613 | 0.0106 |
| 2.84e+03 | Non-smoker | White | 3.34e+03 | -502 | -0.825 | 0.0221 | 616 | 0.00385 |
| 2.59e+03 | Smoker | White | 2.91e+03 | -314 | -0.516 | 0.019 | 617 | 0.00129 |
| 2.3e+03 | Smoker | White | 2.91e+03 | -612 | -1 | 0.019 | 615 | 0.00488 |
| 1.82e+03 | Smoker | White | 2.91e+03 | -1.09e+03 | -1.79 | 0.019 | 611 | 0.0155 |
| 2.42e+03 | Smoker | White | 2.91e+03 | -484 | -0.795 | 0.019 | 616 | 0.00306 |
| 3.4e+03 | Non-smoker | African American | 3.02e+03 | 383 | 0.639 | 0.0506 | 617 | 0.00544 |
| 3.2e+03 | Smoker | Other | 2.48e+03 | 722 | 1.2 | 0.0394 | 615 | 0.0147 |
| 3.86e+03 | Non-smoker | African American | 3.02e+03 | 841 | 1.4 | 0.0506 | 613 | 0.0262 |
| 2.72e+03 | Non-smoker | Other | 2.91e+03 | -188 | -0.309 | 0.0208 | 617 | 0.000508 |
| 3.42e+03 | Non-smoker | White | 3.34e+03 | 77.9 | 0.128 | 0.0221 | 617 | 9.26e-05 |
| 4e+03 | Non-smoker | White | 3.34e+03 | 659 | 1.08 | 0.0221 | 615 | 0.00663 |
| 2.47e+03 | Smoker | White | 2.91e+03 | -442 | -0.726 | 0.019 | 616 | 0.00255 |
| 1.88e+03 | Smoker | White | 2.91e+03 | -1.02e+03 | -1.68 | 0.019 | 612 | 0.0136 |
| 3.65e+03 | Smoker | White | 2.91e+03 | 743 | 1.22 | 0.019 | 614 | 0.00718 |
| 3.7e+03 | Non-smoker | White | 3.34e+03 | 361 | 0.593 | 0.0221 | 617 | 0.00199 |
| 3.3e+03 | Smoker | Other | 2.48e+03 | 822 | 1.36 | 0.0394 | 614 | 0.0191 |
| 2.56e+03 | Smoker | White | 2.91e+03 | -351 | -0.576 | 0.019 | 617 | 0.00161 |
| 2.06e+03 | Non-smoker | Other | 2.91e+03 | -855 | -1.4 | 0.0208 | 613 | 0.0105 |
| 3.37e+03 | Non-smoker | African American | 3.02e+03 | 355 | 0.593 | 0.0506 | 617 | 0.00468 |
| 2.24e+03 | Non-smoker | Other | 2.91e+03 | -670 | -1.1 | 0.0208 | 615 | 0.00643 |
| 3.22e+03 | Non-smoker | Other | 2.91e+03 | 315 | 0.516 | 0.0208 | 617 | 0.00142 |
| 3.54e+03 | Non-smoker | Other | 2.91e+03 | 634 | 1.04 | 0.0208 | 615 | 0.00574 |
| 3.61e+03 | Non-smoker | White | 3.34e+03 | 276 | 0.453 | 0.0221 | 617 | 0.00116 |
| 2.55e+03 | Non-smoker | Other | 2.91e+03 | -359 | -0.59 | 0.0208 | 617 | 0.00185 |
| 3.65e+03 | Non-smoker | White | 3.34e+03 | 313 | 0.514 | 0.0221 | 617 | 0.0015 |
| 3.46e+03 | Non-smoker | White | 3.34e+03 | 121 | 0.199 | 0.0221 | 617 | 0.000223 |
| 2.91e+03 | Smoker | White | 2.91e+03 | -2.39 | -0.00392 | 0.019 | 618 | 7.42e-08 |
| 3.09e+03 | Non-smoker | Other | 2.91e+03 | 180 | 0.295 | 0.0208 | 617 | 0.000461 |
| 3.23e+03 | Non-smoker | Other | 2.91e+03 | 322 | 0.528 | 0.0208 | 617 | 0.00148 |
| 4.11e+03 | Non-smoker | White | 3.34e+03 | 773 | 1.27 | 0.0221 | 614 | 0.00912 |
| 2.88e+03 | Non-smoker | White | 3.34e+03 | -461 | -0.758 | 0.0221 | 616 | 0.00325 |
| 1.94e+03 | Smoker | White | 2.91e+03 | -972 | -1.6 | 0.019 | 612 | 0.0123 |
| 2.38e+03 | Non-smoker | Other | 2.91e+03 | -529 | -0.869 | 0.0208 | 616 | 0.00401 |
| 1.7e+03 | Non-smoker | African American | 3.02e+03 | -1.32e+03 | -2.2 | 0.0506 | 607 | 0.0643 |
| 1.9e+03 | Non-smoker | Other | 2.91e+03 | -1.01e+03 | -1.66 | 0.0208 | 612 | 0.0146 |
| 2.78e+03 | Non-smoker | African American | 3.02e+03 | -241 | -0.401 | 0.0506 | 617 | 0.00214 |
| 2.35e+03 | Smoker | White | 2.91e+03 | -555 | -0.911 | 0.019 | 616 | 0.00402 |
| 2.62e+03 | Non-smoker | Other | 2.91e+03 | -288 | -0.474 | 0.0208 | 617 | 0.00119 |
| 4.17e+03 | Non-smoker | White | 3.34e+03 | 829 | 1.36 | 0.0221 | 614 | 0.0105 |
| 2.75e+03 | Non-smoker | Other | 2.91e+03 | -160 | -0.264 | 0.0208 | 617 | 0.000368 |
| 3.65e+03 | Non-smoker | White | 3.34e+03 | 313 | 0.514 | 0.0221 | 617 | 0.0015 |
| 3.2e+03 | Non-smoker | Other | 2.91e+03 | 293 | 0.48 | 0.0208 | 617 | 0.00122 |
| 3.88e+03 | Smoker | White | 2.91e+03 | 976 | 1.6 | 0.019 | 612 | 0.0124 |
| 3.08e+03 | Non-smoker | White | 3.34e+03 | -258 | -0.424 | 0.0221 | 617 | 0.00102 |
| 3.61e+03 | Non-smoker | White | 3.34e+03 | 276 | 0.453 | 0.0221 | 617 | 0.00116 |
| 3.57e+03 | Non-smoker | Other | 2.91e+03 | 662 | 1.09 | 0.0208 | 615 | 0.00626 |
| 2.22e+03 | Smoker | White | 2.91e+03 | -683 | -1.12 | 0.019 | 615 | 0.00608 |
| 2.08e+03 | Non-smoker | White | 3.34e+03 | -1.26e+03 | -2.06 | 0.0221 | 609 | 0.0241 |
| 3.06e+03 | Non-smoker | White | 3.34e+03 | -276 | -0.454 | 0.0221 | 617 | 0.00116 |
| 2.88e+03 | Non-smoker | Other | 2.91e+03 | -33.5 | -0.055 | 0.0208 | 618 | 1.6e-05 |
| 2.92e+03 | Non-smoker | White | 3.34e+03 | -418 | -0.687 | 0.0221 | 617 | 0.00267 |
| 1.79e+03 | Smoker | White | 2.91e+03 | -1.12e+03 | -1.83 | 0.019 | 610 | 0.0163 |
| 1.93e+03 | Smoker | White | 2.91e+03 | -980 | -1.61 | 0.019 | 612 | 0.0125 |
| 3.23e+03 | Non-smoker | White | 3.34e+03 | -104 | -0.171 | 0.0221 | 617 | 0.000166 |
| 2.21e+03 | Smoker | Other | 2.48e+03 | -270 | -0.447 | 0.0394 | 617 | 0.00205 |
| 3.22e+03 | Non-smoker | Other | 2.91e+03 | 315 | 0.516 | 0.0208 | 617 | 0.00142 |
| 3.26e+03 | Smoker | Other | 2.48e+03 | 779 | 1.29 | 0.0394 | 614 | 0.0171 |
| 2.1e+03 | Non-smoker | White | 3.34e+03 | -1.24e+03 | -2.03 | 0.0221 | 609 | 0.0234 |
| 3.86e+03 | Non-smoker | White | 3.34e+03 | 522 | 0.858 | 0.0221 | 616 | 0.00416 |
| 2.95e+03 | Smoker | African American | 2.59e+03 | 359 | 0.601 | 0.0561 | 617 | 0.00537 |
| 4.15e+03 | Non-smoker | White | 3.34e+03 | 815 | 1.34 | 0.0221 | 614 | 0.0101 |
| 2.98e+03 | Non-smoker | African American | 3.02e+03 | -41.6 | -0.0694 | 0.0506 | 618 | 6.42e-05 |
| 2.6e+03 | Smoker | White | 2.91e+03 | -308 | -0.506 | 0.019 | 617 | 0.00124 |
| 2.38e+03 | Smoker | African American | 2.59e+03 | -208 | -0.348 | 0.0561 | 617 | 0.0018 |
| 3e+03 | Smoker | White | 2.91e+03 | 96.6 | 0.158 | 0.019 | 617 | 0.000122 |
| 3.1e+03 | Non-smoker | White | 3.34e+03 | -238 | -0.391 | 0.0221 | 617 | 0.000866 |
| 3.79e+03 | Non-smoker | African American | 3.02e+03 | 771 | 1.29 | 0.0506 | 614 | 0.022 |
| 2.92e+03 | Smoker | African American | 2.59e+03 | 331 | 0.554 | 0.0561 | 617 | 0.00456 |
| 3.32e+03 | Smoker | White | 2.91e+03 | 409 | 0.67 | 0.019 | 617 | 0.00217 |
| 2.98e+03 | Smoker | White | 2.91e+03 | 68.6 | 0.113 | 0.019 | 618 | 6.13e-05 |
| 4.17e+03 | Non-smoker | White | 3.34e+03 | 836 | 1.37 | 0.0221 | 614 | 0.0107 |
| 3.43e+03 | Smoker | White | 2.91e+03 | 522 | 0.856 | 0.019 | 616 | 0.00354 |
| 3.73e+03 | Non-smoker | White | 3.34e+03 | 390 | 0.641 | 0.0221 | 617 | 0.00232 |
| 2.3e+03 | Smoker | African American | 2.59e+03 | -293 | -0.49 | 0.0561 | 617 | 0.00357 |
| 3.47e+03 | Non-smoker | White | 3.34e+03 | 135 | 0.222 | 0.0221 | 617 | 0.000278 |
| 2.38e+03 | Smoker | African American | 2.59e+03 | -208 | -0.348 | 0.0561 | 617 | 0.0018 |
| 3.06e+03 | Non-smoker | African American | 3.02e+03 | 43.4 | 0.0723 | 0.0506 | 618 | 6.97e-05 |
| 2.52e+03 | Non-smoker | African American | 3.02e+03 | -496 | -0.827 | 0.0506 | 616 | 0.0091 |
| 2.64e+03 | Non-smoker | Other | 2.91e+03 | -273 | -0.449 | 0.0208 | 617 | 0.00107 |
| 3.77e+03 | Non-smoker | White | 3.34e+03 | 432 | 0.71 | 0.0221 | 616 | 0.00285 |
| 2.86e+03 | Non-smoker | Other | 2.91e+03 | -47.5 | -0.078 | 0.0208 | 618 | 3.23e-05 |
| 3.37e+03 | Smoker | White | 2.91e+03 | 466 | 0.764 | 0.019 | 616 | 0.00282 |
| 2.92e+03 | Non-smoker | African American | 3.02e+03 | -98.6 | -0.164 | 0.0506 | 617 | 0.00036 |
| 3.44e+03 | Smoker | African American | 2.59e+03 | 855 | 1.43 | 0.0561 | 613 | 0.0304 |
| 2.41e+03 | Smoker | White | 2.91e+03 | -498 | -0.818 | 0.019 | 616 | 0.00323 |
| 2.78e+03 | Smoker | White | 2.91e+03 | -126 | -0.207 | 0.019 | 617 | 0.000208 |
| 3.18e+03 | Non-smoker | Other | 2.91e+03 | 265 | 0.434 | 0.0208 | 617 | 0.001 |
| 2.41e+03 | Smoker | White | 2.91e+03 | -498 | -0.818 | 0.019 | 616 | 0.00323 |
| 3.27e+03 | Non-smoker | Other | 2.91e+03 | 364 | 0.597 | 0.0208 | 617 | 0.00189 |
| 3.64e+03 | Smoker | White | 2.91e+03 | 729 | 1.2 | 0.019 | 615 | 0.00691 |
| 3.83e+03 | Non-smoker | White | 3.34e+03 | 489 | 0.803 | 0.0221 | 616 | 0.00365 |
| 3.88e+03 | Non-smoker | Other | 2.91e+03 | 974 | 1.6 | 0.0208 | 612 | 0.0136 |
| 3.04e+03 | Smoker | African American | 2.59e+03 | 453 | 0.758 | 0.0561 | 616 | 0.00854 |
| 2.81e+03 | Non-smoker | Other | 2.91e+03 | -103 | -0.17 | 0.0208 | 617 | 0.000153 |
| 3.46e+03 | Non-smoker | White | 3.34e+03 | 122 | 0.2 | 0.0221 | 617 | 0.000227 |
Coeficientes del modelo
model_norm %>% tidy()
| term | estimate | std.error | statistic | p.value |
| (Intercept) | 3.34e+03 | 91.5 | 36.5 | 1.58e-75 |
| smokeSmoker | -430 | 108 | -3.99 | 0.000104 |
| raceAfrican American | -320 | 149 | -2.14 | 0.0341 |
| raceOther | -428 | 117 | -3.65 | 0.000367 |
Intervalos de confianza
model_norm %>% confint() %>% as_tibble()
| 2.5 % | 97.5 % |
| 3.16e+03 | 3.52e+03 |
| -643 | -217 |
| -615 | -24.3 |
| -659 | -196 |
model_norm %>%
glm_coef(labels = model_labels(model_norm))
| Parameter | Coefficient | Pr(>|t|) |
| Constant | 3338.12 (3157.2, 3519.04) | < 0.001 |
| Smoking status: Smoker | -429.74 (-642.58, -216.9) | < 0.001 |
| Race: African American | -319.5 (-614.66, -24.35) | 0.034 |
| Race: Other | -427.65 (-659.34, -195.95) | < 0.001 |
model_norm %>%
glm_coef(se_rob = TRUE, labels = model_labels(model_norm))
| Parameter | Coefficient | Pr(>|t|) |
| Constant | 3338.12 (3157.12, 3519.13) | < 0.001 |
| Smoking status: Smoker | -429.74 (-644.83, -214.65) | < 0.001 |
| Race: African American | -319.5 (-587.4, -51.61) | 0.02 |
| Race: Other | -427.65 (-671.48, -183.81) | < 0.001 |
model_norm %>%
plot_model("pred", terms = ~race|smoke, dot.size = 1.5, title = "")

emmip(model_norm, smoke ~ race) %>%
gf_labs(y = get_label(birthwt$bwt), x = "", col = "Smoking status")

Multicollinealidad del modelo
library(regclass)
model_norm %>% VIF() %>% as_tibble()
| GVIF | Df | GVIF^(1/(2*Df)) |
| 1.12 | 1 | 1.06 |
| 1.12 | 2 | 1.03 |
Residuales del modelo ajustado
p1 <- ggplot(birthwt.train, aes(birthwt.train[,2], residuals(model_norm))) +
geom_point() + geom_smooth(color = "blue")
p2 <- ggplot(birthwt.train, aes(birthwt.train[,3], residuals(model_norm))) +
geom_point() + geom_smooth(color = "blue")
p3 <- ggplot(birthwt.train, aes(birthwt.train[,10], residuals(model_norm))) +
geom_point() + geom_smooth(color = "blue")
library(pdp)
grid.arrange(p1, p2, p3)

Datos arípicos
library(olsrr)
model_norm %>% ols_plot_cooksd_bar()

Reajustando el modelo de regresión lineal
Algoritmo paso a paso
model_norm %>%
Anova() %>%
tidy()
| term | sumsq | df | statistic | p.value |
| smoke | 6.03e+06 | 1 | 15.9 | 0.000104 |
| race | 5.46e+06 | 2 | 7.21 | 0.00103 |
| Residuals | 5.57e+07 | 147 | | |
model_norm %>%
tidy()
| term | estimate | std.error | statistic | p.value |
| (Intercept) | 3.34e+03 | 91.5 | 36.5 | 1.58e-75 |
| smokeSmoker | -430 | 108 | -3.99 | 0.000104 |
| raceAfrican American | -320 | 149 | -2.14 | 0.0341 |
| raceOther | -428 | 117 | -3.65 | 0.000367 |
model_norm %>%
glm_coef(labels = model_labels(model_norm))
| Parameter | Coefficient | Pr(>|t|) |
| Constant | 3338.12 (3157.2, 3519.04) | < 0.001 |
| Smoking status: Smoker | -429.74 (-642.58, -216.9) | < 0.001 |
| Race: African American | -319.5 (-614.66, -24.35) | 0.034 |
| Race: Other | -427.65 (-659.34, -195.95) | < 0.001 |
model_norm %>%
glm_coef(se_rob = TRUE, labels = model_labels(model_norm))
| Parameter | Coefficient | Pr(>|t|) |
| Constant | 3338.12 (3157.12, 3519.13) | < 0.001 |
| Smoking status: Smoker | -429.74 (-644.83, -214.65) | < 0.001 |
| Race: African American | -319.5 (-587.4, -51.61) | 0.02 |
| Race: Other | -427.65 (-671.48, -183.81) | < 0.001 |
Criterios de ajuste del modelo líneal
model_norm %>% glance()
| r.squared | adj.r.squared | sigma | statistic | p.value | df | logLik | AIC | BIC | deviance | df.residual | nobs |
| 0.136 | 0.119 | 615 | 7.73 | 7.88e-05 | 3 | -1.18e+03 | 2.37e+03 | 2.39e+03 | 5.57e+07 | 147 | 151 |
library(MASS)
model_norm_AIC <- stepAIC(model_norm, trace = 0)
model_norm_AIC %>%
Anova() %>%
tidy()
| term | sumsq | df | statistic | p.value |
| smoke | 6.03e+06 | 1 | 15.9 | 0.000104 |
| race | 5.46e+06 | 2 | 7.21 | 0.00103 |
| Residuals | 5.57e+07 | 147 | | |
model_norm_AIC %>% glance()
| r.squared | adj.r.squared | sigma | statistic | p.value | df | logLik | AIC | BIC | deviance | df.residual | nobs |
| 0.136 | 0.119 | 615 | 7.73 | 7.88e-05 | 3 | -1.18e+03 | 2.37e+03 | 2.39e+03 | 5.57e+07 | 147 | 151 |
Análisis de varianza
model_norm %>% Anova() %>% tidy()
| term | sumsq | df | statistic | p.value |
| smoke | 6.03e+06 | 1 | 15.9 | 0.000104 |
| race | 5.46e+06 | 2 | 7.21 | 0.00103 |
| Residuals | 5.57e+07 | 147 | | |
Comparación de criterios
AIC(model_norm, model_norm_AIC)
| df | AIC |
| 5 | 2.37e+03 |
| 5 | 2.37e+03 |
Importancia relativa decada variable
#library(relaimpo)
#calc.relimp(model_norm_AIC, type = c("lmg", "last", "first", "pratt", "betasq"), rela = T)
#boot <- boot.relimp(model_norm, b = 1000, type = c("lmg", "last", "first", "pratt"),
# rank = TRUE, diff = TRUE, rela = TRUE)
#booteval.relimp(boot)
#plot(booteval.relimp(boot,sort=TRUE))