## REGRESIÓN MÚLTIPLE, 1ª PRÁCTICA ANÁLISIS ESSTADÍSTICO AVANZADO ##
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(boot)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:boot':
##
## logit
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(QuantPsyc)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'QuantPsyc'
##
## The following object is masked from 'package:base':
##
## norm
library(ggplot2)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(nortest)
library(corrr)
library(corrplot)
## corrplot 0.92 loaded
library(PerformanceAnalytics)
## Loading required package: xts
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
##
## Attaching package: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
library(psych)
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:car':
##
## logit
##
## The following object is masked from 'package:boot':
##
## logit
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(dplyr)
library(Hmisc)
##
## Attaching package: 'Hmisc'
##
## The following object is masked from 'package:psych':
##
## describe
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
# Cargar libreria.
library(haven)
BBDD_Mundo_95 <- read_sav("C:/Users/javie/OneDrive - COMPARTE MARKETING SOCIAL, S.L/Escritorio/máster MIAS/ANÁLISIS ESTADÍSTICO AVANZADO/BBDD_Mundo 95.sav")
View(BBDD_Mundo_95)
names(BBDD_Mundo_95)
## [1] "país" "poblac" "densidad" "urbana" "relig"
## [6] "espvidaf" "espvidam" "alfabet" "inc_pob" "mortinf"
## [11] "pib_cap" "región" "calorías" "sida" "tasa_nat"
## [16] "tasa_mor" "tasasida" "log_pib" "logtsida" "nac_def"
## [21] "fertilid" "log_pob" "cregrano" "alfabmas" "alfabfem"
## [26] "clima" "espvidafrec"
## Matriz de correlaciones con nuestras variables seleccionadas.
cor_matrix <- cor(BBDD_Mundo_95[c("fertilid", "pib_cap", "alfabfem", "mortinf")], use = "complete.obs")
print("Matriz de Correlación:")
## [1] "Matriz de Correlación:"
matriz_correlacion <- print(cor_matrix)
## fertilid pib_cap alfabfem mortinf
## fertilid 1.0000000 -0.5041522 -0.8389623 0.8088380
## pib_cap -0.5041522 1.0000000 0.4287245 -0.5849956
## alfabfem -0.8389623 0.4287245 1.0000000 -0.8434112
## mortinf 0.8088380 -0.5849956 -0.8434112 1.0000000
## Gráfico que acompaña a la correlación.
cor_matrix <- cor(BBDD_Mundo_95[c("fertilid", "pib_cap", "alfabfem", "mortinf")], use = "complete.obs")
datos <- BBDD_Mundo_95[, c(21, 11, 25, 10)]
pairs(x = datos, lower.panel = NULL)

ggpairs(datos,
lower = list(continuous = "smooth"),
diag = list(continuous = "bar"),
axisLabels = "none")
## Warning in check_and_set_ggpairs_defaults("diag", diag, continuous =
## "densityDiag", : Changing diag$continuous from 'bar' to 'barDiag'
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 24 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 24 rows containing missing values
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 24 rows containing missing values
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## A continuación, presentamos un resumen del modelo.
rg_multiple_1 <- lm (fertilid ~ pib_cap + alfabfem + mortinf, BBDD_Mundo_95)
summary(rg_multiple_1)
##
## Call:
## lm(formula = fertilid ~ pib_cap + alfabfem + mortinf, data = BBDD_Mundo_95)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.21830 -0.55838 -0.09492 0.47578 2.16284
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.916e+00 7.708e-01 7.674 3.28e-11 ***
## pib_cap -4.127e-05 2.803e-05 -1.472 0.1448
## alfabfem -3.729e-02 6.940e-03 -5.373 7.25e-07 ***
## mortinf 1.335e-02 5.775e-03 2.311 0.0234 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9668 on 81 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.7462, Adjusted R-squared: 0.7368
## F-statistic: 79.37 on 3 and 81 DF, p-value: < 2.2e-16
## Correlación de Pearson:
sqrt(0.7462)
## [1] 0.8638287
## La pendiente va a ser elevada y positiva, va a haber una relación fuerte entre las variables propuestas para el modelo.
## valor de p: muy por debajo de 0,01 por lo que rechazamos la hipótesis nula, por lo que va a existir una relación lineal.
## R cuadrado: 0,7462
## R cuadrado ajustado: 0,7368 (lo que nos viene a decir que cada una de las variables indepndientes suman información útil al modelo, es prácticamnte lo mismo que el R cuadradro)
## Las estimadas que vemos, nos dicen que al aumntar un punto pib_cap, baja la fertilidad
## al aumentar un punto la alfabetización de las mujeres, baja la fertilidad.
## al aumentar un punto la mortalidad infantil, aumenta la fertilidad.
## SUPUESTOS:
# Linealidad:
crPlots(rg_multiple_1)

## Normalidad e independencia de los residuos:
hist(residuals(rg_multiple_1))

lillie.test(residuals(rg_multiple_1))
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: residuals(rg_multiple_1)
## D = 0.10961, p-value = 0.01336
dwtest(rg_multiple_1)
##
## Durbin-Watson test
##
## data: rg_multiple_1
## DW = 1.8085, p-value = 0.1816
## alternative hypothesis: true autocorrelation is greater than 0
## A continuación nos apoyamos en más gráficas para seguir anaalizando nuestro modelo multivariante.
plot(rg_multiple_1)




## Evaluación Multicolinealidad del Modelo:
vif(rg_multiple_1)
## pib_cap alfabfem mortinf
## 1.554502 3.542331 4.395441
tolerancia <- 1/vif(rg_multiple_1)
print(tolerancia)
## pib_cap alfabfem mortinf
## 0.6432929 0.2823000 0.2275085
## Detección de casos atípicos y observaciones influyentes
summary(influence.measures(rg_multiple_1))
## Potentially influential observations of
## lm(formula = fertilid ~ pib_cap + alfabfem + mortinf, data = BBDD_Mundo_95) :
##
## dfb.1_ dfb.pb_c dfb.alfb dfb.mrtn dffit cov.r cook.d hat
## 2 0.22 -0.15 -0.15 -0.31 -0.37 1.22_* 0.03 0.17_*
## 4 0.25 0.15 -0.29 -0.16 0.43 0.84_* 0.04 0.03
## 7 -0.04 0.11 0.03 0.04 0.12 1.18_* 0.00 0.12
## 16 -0.36 0.10 0.35 0.31 -0.37 1.28_* 0.03 0.20_*
## 25 -0.11 0.23 0.01 0.09 -0.35 0.82_* 0.03 0.02
## 38 -0.15 0.42 0.08 0.16 0.45 1.39_* 0.05 0.26_*
## 61 0.05 -0.15 -0.03 -0.04 -0.17 1.19_* 0.01 0.12
## 94 0.44 -0.19 -0.39 -0.39 0.49 0.84_* 0.06 0.04
influencePlot(rg_multiple_1)

## StudRes Hat CookD
## 16 -0.7304948 0.20117565 0.03379140
## 25 -2.3849620 0.02081163 0.02856982
## 38 0.7473453 0.26260476 0.04999866
## 52 -1.7853231 0.11504297 0.10086469
## 86 -1.9120941 0.07878879 0.07569220
## 94 2.3480413 0.04161345 0.05668861
## Regresiones simples:
## Variable predictora: PIB per cápita.
rg_pib_per_capita <-lm (fertilid ~ pib_cap, BBDD_Mundo_95)
summary(rg_pib_per_capita)
##
## Call:
## lm(formula = fertilid ~ pib_cap, data = BBDD_Mundo_95)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6565 -1.3002 -0.0888 1.1350 3.6791
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.561e+00 2.023e-01 22.543 < 2e-16 ***
## pib_cap -1.698e-04 2.308e-05 -7.355 4.38e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.553 on 105 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.34, Adjusted R-squared: 0.3338
## F-statistic: 54.1 on 1 and 105 DF, p-value: 4.379e-11
## Normalidad residuos
hist(residuals(rg_pib_per_capita))

lillie.test(residuals(rg_pib_per_capita))
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: residuals(rg_pib_per_capita)
## D = 0.086191, p-value = 0.04864
plot(rg_pib_per_capita)




## vARIABLE PREDICTORA: ALFABETIZACIÓN MUJERES
rg_afabetfem <- lm (fertilid ~ alfabfem, BBDD_Mundo_95)
summary(rg_afabetfem)
##
## Call:
## lm(formula = fertilid ~ alfabfem, data = BBDD_Mundo_95)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7175 -0.5207 -0.1281 0.5326 2.6178
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.654157 0.287308 26.64 <2e-16 ***
## alfabfem -0.055260 0.003934 -14.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.032 on 83 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.7039, Adjusted R-squared: 0.7003
## F-statistic: 197.3 on 1 and 83 DF, p-value: < 2.2e-16
#Normalidad RESIDUOS.
hist(residuals(rg_afabetfem))

lillie.test(residuals(rg_afabetfem))
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: residuals(rg_afabetfem)
## D = 0.093114, p-value = 0.06587
plot(rg_afabetfem)




## Variable predictora: mortalidad infantil
rg_mortinf <- lm (fertilid ~ mortinf, BBDD_Mundo_95)
summary(rg_mortinf)
##
## Call:
## lm(formula = fertilid ~ mortinf, data = BBDD_Mundo_95)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0996 -0.5931 -0.2511 0.4450 3.2260
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.779399 0.154290 11.53 <2e-16 ***
## mortinf 0.041541 0.002692 15.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.057 on 105 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.694, Adjusted R-squared: 0.6911
## F-statistic: 238.2 on 1 and 105 DF, p-value: < 2.2e-16
#Normalidad residuos:
hist(residuals(rg_mortinf))

lillie.test(residuals(rg_mortinf))
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: residuals(rg_mortinf)
## D = 0.14555, p-value = 7.861e-06