Librerías y dataset utilizados
library(readr)
library(ggplot2)
library(plotly)
library(corrplot)
library(tseries)
library(forecast)
variables <- read_csv("variables.csv",
col_types = cols(precio_credito = col_number()))
variables <- variables[!is.na(variables$fecha), ]
vars_num <- variables[sapply(variables, is.numeric)]
Regresiones Lineales Simples y sus respectivas
gráficas
### Prueba #1
prueba1 <- lm(matricula_total ~ incidentes, variables)
summary(prueba1)
##
## Call:
## lm(formula = matricula_total ~ incidentes, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2414.2 -586.9 -134.5 613.9 2266.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8723.50 500.68 17.423 1.03e-12 ***
## incidentes 72.99 16.96 4.303 0.000428 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1191 on 18 degrees of freedom
## Multiple R-squared: 0.5071, Adjusted R-squared: 0.4797
## F-statistic: 18.52 on 1 and 18 DF, p-value: 0.0004278
ggplot(data = variables,
aes(x = incidentes, y = matricula_total)) +
geom_point(size = 3, color = "purple") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "Relación entre los incidentes ocurridos en el campus y
la matrícula total",
x = "Incidentes ocurridos dentro del campus",
y = "Matrícula Total"
) +
theme_minimal()

# Prueba 2
prueba2 <- lm(matricula_total ~ precio_credito, variables)
summary(prueba2)
##
## Call:
## lm(formula = matricula_total ~ precio_credito, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1559.3 -350.7 15.2 427.7 1139.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14645.00 448.90 32.624 < 2e-16 ***
## precio_credito -35.89 3.70 -9.699 1.43e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 679.7 on 18 degrees of freedom
## Multiple R-squared: 0.8394, Adjusted R-squared: 0.8305
## F-statistic: 94.07 on 1 and 18 DF, p-value: 1.427e-08
ggplot(data = variables,
aes(x = precio_credito, y = matricula_total)) +
geom_point(size = 3, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "Relación entre precio del crédito y la matrícula",
x = "Precio del crédito",
y = "Matrícula"
) +
theme_minimal()

#Prueba 3
prueba3 <- lm(matricula_total ~ poblacion_15a24, variables)
summary(prueba3)
##
## Call:
## lm(formula = matricula_total ~ poblacion_15a24, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -920.60 -465.62 31.49 300.68 1503.55
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.073e+04 2.278e+03 -4.711 0.000174 ***
## poblacion_15a24 4.825e-02 5.152e-03 9.365 2.43e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 699.9 on 18 degrees of freedom
## Multiple R-squared: 0.8297, Adjusted R-squared: 0.8202
## F-statistic: 87.7 on 1 and 18 DF, p-value: 2.428e-08
ggplot(data = variables,
aes(x = poblacion_15a24, y = matricula_total)) +
geom_point(size = 3, color = "green3") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "Relación entre la población de jóvenes de 15 a 24 años y
la matrícula",
x = "Población de 15 a 24 años",
y = "Matrícula"
)

#Prueba 4
prueba4 <- lm(matricula_total ~ cant_concent, variables)
summary(prueba4)
##
## Call:
## lm(formula = matricula_total ~ cant_concent, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1493.0 -747.6 -130.2 799.4 1661.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -29434.2 6376.8 -4.616 0.000215 ***
## cant_concent 666.4 106.2 6.274 6.47e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 950.1 on 18 degrees of freedom
## Multiple R-squared: 0.6862, Adjusted R-squared: 0.6687
## F-statistic: 39.36 on 1 and 18 DF, p-value: 6.468e-06
ggplot(data = variables,
aes(x = cant_concent, y = matricula_total)) +
geom_point(size = 3, color = "gold2") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "Relación entre la cantidad de concentraciones y la matrícula",
x = "Cantidad de Concentraciones por semestre",
y = "Matricula Total"
) +
theme_minimal()

Gráfica de correlaciones
R <- cor(vars_num, use = "pairwise.complete.obs")
corrplot(R,
method = "color",
type = "lower",
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
diag = FALSE)

Serie de Tiempo
ts <- ts(variables$matricula_total,
start = c(2015, 1),
frequency = 2)
print(ts)
## Time Series:
## Start = c(2015, 1)
## End = c(2024, 2)
## Frequency = 2
## [1] 13014 12374 13472 12788 11981 11040 11657 10842 11254 10388 10877 9979
## [13] 10207 9179 9212 8499 8888 8276 8738 8300
df_ts <- data.frame(
tiempo = time(ts),
matricula = as.numeric(ts)
)
ggplot(df_ts, aes(x = tiempo, y = matricula)) +
geom_line(color = "skyblue", linewidth = 1) +
geom_point(color = "skyblue", size = 2) +
labs(
title = "Evolución de la matrícula subgraduada",
x = "Año",
y = "Matrícula total"
) +
theme_minimal()

adf.test(ts)
##
## Augmented Dickey-Fuller Test
##
## data: ts
## Dickey-Fuller = -3.7305, Lag order = 2, p-value = 0.04068
## alternative hypothesis: stationary
modelo.arima <- auto.arima(ts, stepwise = T, approximation = T)
print(modelo.arima)
## Series: ts
## ARIMA(0,0,0)(0,1,0)[2] with drift
##
## Coefficients:
## drift
## -231.9444
## s.e. 64.1151
##
## sigma^2 = 313402: log likelihood = -138.92
## AIC=281.85 AICc=282.65 BIC=283.63
prediccion <- forecast(modelo.arima, h = 4)
print(prediccion)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2025.00 8274.111 7556.669 8991.553 7176.878 9371.344
## 2025.50 7836.111 7118.669 8553.553 6738.878 8933.344
## 2026.00 7810.222 6795.606 8824.839 6258.500 9361.944
## 2026.50 7372.222 6357.606 8386.839 5820.500 8923.944
autoplot(prediccion) +
labs(
title = "Pronóstico de la matrícula subgraduada (2 años)",
subtitle = "Modelo ARIMA seleccionado automáticamente",
x = "Año",
y = "Matrícula total"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
