Explorar las varibles de la base de datos
library(haven)
hatco <- read_sav("D:/OneDrive/FACEN/Hatco.sav")
#str(hatco)
names(hatco)
## [1] "x1" "x2" "x3" "x4" "x5" "x6" "x7" "y"
#View(hatco)
# Crear un vector con las etiquetas de descripción
descripciones <- c(
"X1: Velocidad de entrega",
"X2: Nivel de precio",
"X3: Flexibilidad de precios",
"X4: Imagen del fabricante",
"X5: Servicio conjunto",
"X6: Imagen de la fuerza de ventas",
"X7: Calidad del producto"
)
# Asignar las etiquetas a las columnas del dataframe
colnames(hatco) <- descripciones
names(hatco)
## [1] "x1" "x2" "x3" "x4" "x5" "x6" "x7" "y"
#"X1: Velocidad de entrega"
summary(hatco$x1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.500 3.400 3.515 4.600 6.100
boxplot(hatco$x1)
## variable X2
summary(hatco$x2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.200 1.475 2.150 2.364 3.225 5.400
boxplot(hatco$x2)
## Verificando el cumplimiento del requisito de homocedasticidad
boxplot(hatco$x1, hatco$x2, hatco$x3, hatco$x4, hatco$x5, hatco$x6, hatco$x7)
par(mfrow = c(3, 1))
h1<-hist(hatco$x1)
h2<-hist(hatco$x2)
h3<-hist(hatco$x3)
h4<-hist(hatco$x4)
h5<-hist(hatco$x5)
h6<-hist(hatco$x6)
h7<-hist(hatco$x7)
#dispersion
hatco1<-subset(hatco,select=c(x1,x2,x3,y))
plot(hatco1)
#dispersion
hatco1<-subset(hatco,select=c(x4,x5,x6,x7,y))
plot(hatco1)
#dispersion
plot(hatco)
library(psych)
pairs.panels(hatco,
ellipses = T,
hist.col = "lightblue",
lm=TRUE,
cor = TRUE,
cex=1)
#03-Correlación
hatco<-data.frame(hatco)
#Funcion para aplicar test_shapiro.test para cada variable numerica de la base de datos
test_shapiro.test <- function(data) {
resultados <- data.frame(Variable = character(),
Estadístico = numeric(),
Valor_p = numeric(),
stringsAsFactors = FALSE)
for (col in colnames(data)) {
variable <- data[[col]]
test <- shapiro.test(variable)
resultados <- rbind(resultados, data.frame(Variable = col,
Estadístico = test$statistic,
Valor_p = test$p.value))
}
return(resultados)
}
test_shapiro.test(hatco)
## Variable Estadístico Valor_p
## W x1 0.9854246 0.3406333975
## W1 x2 0.9685102 0.0170193024
## W2 x3 0.9503082 0.0008693206
## W3 x4 0.9818003 0.1831623707
## W4 x5 0.9858613 0.3657020308
## W5 x6 0.9634128 0.0070943660
## W6 x7 0.9713664 0.0281514400
## W7 y 0.9850479 0.3201266472
#Se aplica el test a cada una de las variables numéricas
library(correlation)
correlation(hatco)
## # Correlation Matrix (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(98) | p
## --------------------------------------------------------------------
## x1 | x2 | -0.35 | [-0.51, -0.16] | -3.69 | 0.006**
## x1 | x3 | 0.51 | [ 0.35, 0.64] | 5.86 | < .001***
## x1 | x4 | 0.05 | [-0.15, 0.24] | 0.50 | > .999
## x1 | x5 | 0.61 | [ 0.47, 0.72] | 7.66 | < .001***
## x1 | x6 | 0.08 | [-0.12, 0.27] | 0.77 | > .999
## x1 | x7 | -0.48 | [-0.62, -0.32] | -5.46 | < .001***
## x1 | y | 0.68 | [ 0.55, 0.77] | 9.09 | < .001***
## x2 | x3 | -0.49 | [-0.62, -0.32] | -5.52 | < .001***
## x2 | x4 | 0.27 | [ 0.08, 0.44] | 2.80 | 0.092
## x2 | x5 | 0.51 | [ 0.35, 0.64] | 5.92 | < .001***
## x2 | x6 | 0.19 | [-0.01, 0.37] | 1.88 | 0.572
## x2 | x7 | 0.47 | [ 0.30, 0.61] | 5.27 | < .001***
## x2 | y | 0.08 | [-0.12, 0.27] | 0.81 | > .999
## x3 | x4 | -0.12 | [-0.31, 0.08] | -1.16 | > .999
## x3 | x5 | 0.07 | [-0.13, 0.26] | 0.66 | > .999
## x3 | x6 | -0.03 | [-0.23, 0.16] | -0.34 | > .999
## x3 | x7 | -0.45 | [-0.59, -0.28] | -4.96 | < .001***
## x3 | y | 0.56 | [ 0.41, 0.68] | 6.67 | < .001***
## x4 | x5 | 0.30 | [ 0.11, 0.47] | 3.10 | 0.041*
## x4 | x6 | 0.79 | [ 0.70, 0.85] | 12.68 | < .001***
## x4 | x7 | 0.20 | [ 0.00, 0.38] | 2.02 | 0.507
## x4 | y | 0.22 | [ 0.03, 0.40] | 2.28 | 0.299
## x5 | x6 | 0.24 | [ 0.05, 0.42] | 2.46 | 0.205
## x5 | x7 | -0.06 | [-0.25, 0.14] | -0.55 | > .999
## x5 | y | 0.70 | [ 0.58, 0.79] | 9.72 | < .001***
## x6 | x7 | 0.18 | [-0.02, 0.36] | 1.78 | 0.621
## x6 | y | 0.26 | [ 0.06, 0.43] | 2.62 | 0.142
## x7 | y | -0.19 | [-0.37, 0.00] | -1.94 | 0.551
##
## p-value adjustment method: Holm (1979)
## Observations: 100
modelo <- lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7, data = hatco)
summary(modelo)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7, data = hatco)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9589 -1.9284 0.5978 2.8182 6.7565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.18687 4.97678 -2.047 0.0435 *
## x1 -0.05758 2.01266 -0.029 0.9772
## x2 -0.69691 2.09017 -0.333 0.7396
## x3 3.36822 0.41123 8.191 1.44e-12 ***
## x4 -0.04220 0.66681 -0.063 0.9497
## x5 8.36914 3.91815 2.136 0.0353 *
## x6 1.28067 0.94717 1.352 0.1797
## x7 0.56693 0.35543 1.595 0.1141
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.424 on 92 degrees of freedom
## Multiple R-squared: 0.7749, Adjusted R-squared: 0.7578
## F-statistic: 45.25 on 7 and 92 DF, p-value: < 2.2e-16
#Modelo 1 M1
modelo1 <- lm(y ~ x2 + x3 + x4 + x5 + x6 + x7, data = hatco)
summary(modelo1)
##
## Call:
## lm(formula = y ~ x2 + x3 + x4 + x5 + x6 + x7, data = hatco)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9606 -1.9296 0.5967 2.7858 6.7584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.21610 4.84455 -2.109 0.0377 *
## x2 -0.63969 0.60400 -1.059 0.2923
## x3 3.36843 0.40895 8.237 1.08e-12 ***
## x4 -0.03936 0.65585 -0.060 0.9523
## x5 8.25956 0.82176 10.051 < 2e-16 ***
## x6 1.27904 0.94037 1.360 0.1771
## x7 0.56677 0.35347 1.603 0.1122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.4 on 93 degrees of freedom
## Multiple R-squared: 0.7749, Adjusted R-squared: 0.7604
## F-statistic: 53.37 on 6 and 93 DF, p-value: < 2.2e-16
#Modelo 2 M2
modelo2 <- lm(y ~ x1+x2 + x4 + x5 + x6 + x7, data = hatco)
summary(modelo2)
##
## Call:
## lm(formula = y ~ x1 + x2 + x4 + x5 + x6 + x7, data = hatco)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2891 -3.4401 0.2615 4.7243 10.2024
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.9959 4.5445 4.180 6.58e-05 ***
## x1 -0.3585 2.6319 -0.136 0.8920
## x2 -3.3523 2.7006 -1.241 0.2176
## x4 -0.5618 0.8682 -0.647 0.5191
## x5 11.2980 5.1031 2.214 0.0293 *
## x6 1.9416 1.2343 1.573 0.1191
## x7 0.1604 0.4603 0.348 0.7283
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.786 on 93 degrees of freedom
## Multiple R-squared: 0.6108, Adjusted R-squared: 0.5857
## F-statistic: 24.33 on 6 and 93 DF, p-value: < 2.2e-16
modelo_step <- step(modelo, direction = "both") # o direction = c("backward", "forward","both")
## Start: AIC=305.06
## y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7
##
## Df Sum of Sq RSS AIC
## - x1 1 0.02 1800.3 303.06
## - x4 1 0.08 1800.4 303.06
## - x2 1 2.18 1802.5 303.18
## - x6 1 35.78 1836.1 305.02
## <none> 1800.3 305.06
## - x7 1 49.79 1850.1 305.78
## - x5 1 89.28 1889.6 307.90
## - x3 1 1312.76 3113.1 357.82
##
## Step: AIC=303.06
## y ~ x2 + x3 + x4 + x5 + x6 + x7
##
## Df Sum of Sq RSS AIC
## - x4 1 0.07 1800.4 301.06
## - x2 1 21.71 1822.1 302.25
## - x6 1 35.81 1836.2 303.03
## <none> 1800.3 303.06
## - x7 1 49.77 1850.1 303.78
## + x1 1 0.02 1800.3 305.06
## - x3 1 1313.37 3113.7 355.84
## - x5 1 1955.68 3756.0 374.59
##
## Step: AIC=301.06
## y ~ x2 + x3 + x5 + x6 + x7
##
## Df Sum of Sq RSS AIC
## - x2 1 21.80 1822.2 300.26
## <none> 1800.4 301.06
## - x7 1 49.70 1850.1 301.78
## + x4 1 0.07 1800.3 303.06
## + x1 1 0.01 1800.4 303.06
## - x6 1 81.13 1881.5 303.47
## - x3 1 1326.75 3127.2 354.27
## - x5 1 1987.41 3787.8 373.44
##
## Step: AIC=300.26
## y ~ x3 + x5 + x6 + x7
##
## Df Sum of Sq RSS AIC
## - x7 1 31.09 1853.3 299.96
## <none> 1822.2 300.26
## + x2 1 21.80 1800.4 301.06
## + x1 1 19.71 1802.5 301.18
## + x4 1 0.16 1822.1 302.25
## - x6 1 84.06 1906.3 302.77
## - x3 1 1939.30 3761.5 370.74
## - x5 1 3068.33 4890.5 396.99
##
## Step: AIC=299.96
## y ~ x3 + x5 + x6
##
## Df Sum of Sq RSS AIC
## <none> 1853.3 299.96
## + x7 1 31.09 1822.2 300.26
## + x2 1 3.19 1850.1 301.78
## + x1 1 2.95 1850.3 301.80
## + x4 1 0.01 1853.3 301.95
## - x6 1 109.19 1962.5 303.68
## - x3 1 2153.64 4006.9 375.06
## - x5 1 3039.11 4892.4 395.03
summary(modelo_step)
##
## Call:
## lm(formula = y ~ x3 + x5 + x6, data = hatco)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.5520 -2.1298 0.1947 2.9815 7.5736
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.5201 3.2467 -2.008 0.0474 *
## x3 3.3760 0.3196 10.562 <2e-16 ***
## x5 7.6214 0.6074 12.547 <2e-16 ***
## x6 1.4056 0.5910 2.378 0.0194 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.394 on 96 degrees of freedom
## Multiple R-squared: 0.7683, Adjusted R-squared: 0.7611
## F-statistic: 106.1 on 3 and 96 DF, p-value: < 2.2e-16
# Obtener los residuos estandarizados
residuos <- rstandard(modelo_step)
# Graficar residuos vs. valores ajustados
plot(fitted(modelo_step), residuos, xlab = "Valores ajustados",
ylab = "Residuos estandarizados",
main = "Gráfico de residuos vs. valores ajustados")
abline(h = 0, col = "red", lty = 2) # Agregar línea horizontal en 0
#08 Normalidad de los residuos
shapiro.test(modelo_step$residuals)
##
## Shapiro-Wilk normality test
##
## data: modelo_step$residuals
## W = 0.9623, p-value = 0.005881
library(nortest)
lillie.test(modelo_step$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: modelo_step$residuals
## D = 0.079017, p-value = 0.1298