# Librerías
if (!require("dplyr")) install.packages("dplyr")
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("rio")) install.packages("rio")
if (!require("ggpubr")) install.packages("ggpubr")
library(dplyr)
library(ggplot2)
library(rio)
library(ggpubr)
# Cargar base de datos
base <- import("https://github.com/azula89/bases/raw/main/iiee_rur_con.xlsx")
# Crear variables
base <- base %>%
mutate(
pct_rural = (rural / n) * 100,
pct_internet = (internet / n) * 100
)
# Análisis descriptivo
summary(base[, c("pct_rural", "pct_internet")])
## pct_rural pct_internet
## Min. : 0.00 Min. : 4.467
## 1st Qu.: 64.52 1st Qu.:11.905
## Median : 87.57 Median :21.721
## Mean : 75.29 Mean :25.939
## 3rd Qu.: 94.77 3rd Qu.:34.222
## Max. :100.00 Max. :87.850
ggplot(base, aes(x = pct_rural)) + geom_histogram(fill = "skyblue", bins = 25)

ggplot(base, aes(x = pct_internet)) + geom_histogram(fill = "salmon", bins = 25)

# Correlación nacional
cor.test(base$pct_rural, base$pct_internet)
##
## Pearson's product-moment correlation
##
## data: base$pct_rural and base$pct_internet
## t = -21.403, df = 194, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8754658 -0.7909086
## sample estimates:
## cor
## -0.8381536
# Gráfico de correlación
ggscatter(base, x = "pct_rural", y = "pct_internet",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson")

# Modelo nacional
modelo <- lm(pct_internet ~ pct_rural, data = base)
summary(modelo)
##
## Call:
## lm(formula = pct_internet ~ pct_rural, data = base)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.457 -6.622 -1.015 5.318 42.913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.97668 1.98697 33.2 <2e-16 ***
## pct_rural -0.53175 0.02484 -21.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.38 on 194 degrees of freedom
## Multiple R-squared: 0.7025, Adjusted R-squared: 0.701
## F-statistic: 458.1 on 1 and 194 DF, p-value: < 2.2e-16
predict(modelo, data.frame(pct_rural = 60), interval = "confidence")
## fit lwr upr
## 1 34.0717 32.55258 35.59081
ggplot(base, aes(x = pct_rural, y = pct_internet)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", color = "red") +
geom_vline(xintercept = 60, linetype = "dashed")

# Filtrado Amazonía
amazonia <- c("AMAZONAS", "LORETO", "MADRE DE DIOS", "SAN MARTIN", "UCAYALI")
base_amz <- base %>% filter(DPTO %in% amazonia)
# Correlación y modelo Amazonía
cor.test(base_amz$pct_rural, base_amz$pct_internet)
##
## Pearson's product-moment correlation
##
## data: base_amz$pct_rural and base_amz$pct_internet
## t = -4.3915, df = 30, p-value = 0.000129
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7997807 -0.3540713
## sample estimates:
## cor
## -0.6255351
modelo_amz <- lm(pct_internet ~ pct_rural, data = base_amz)
summary(modelo_amz)
##
## Call:
## lm(formula = pct_internet ~ pct_rural, data = base_amz)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.211 -4.651 -0.742 2.363 35.590
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 58.8206 9.2450 6.362 5.09e-07 ***
## pct_rural -0.4760 0.1084 -4.391 0.000129 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.888 on 30 degrees of freedom
## Multiple R-squared: 0.3913, Adjusted R-squared: 0.371
## F-statistic: 19.28 on 1 and 30 DF, p-value: 0.000129
predict(modelo_amz, data.frame(pct_rural = 85), interval = "confidence")
## fit lwr upr
## 1 18.36083 15.50909 21.21256
# Visualización modelo Amazonía
ggplot(base_amz, aes(x = pct_rural, y = pct_internet)) +
geom_point(color = "darkgreen") +
geom_smooth(method = "lm", color = "orange") +
geom_vline(xintercept = 85, linetype = "dashed")
