Librerías y dataset utilizados

library(readr)
library(ggplot2)
library(plotly)
library(corrplot)
library(tseries)
library(forecast)
variables <- read_csv("variables.csv", 
                      col_types = cols(precio_credito = col_number()))
variables <- variables[!is.na(variables$fecha), ]
vars_num <- variables[sapply(variables, is.numeric)]

Regresiones Lineales Simples y sus respectivas gráficas

### Prueba #1
prueba1 <- lm(matricula_total ~ incidentes, variables)

summary(prueba1)
## 
## Call:
## lm(formula = matricula_total ~ incidentes, data = variables)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2414.2  -586.9  -134.5   613.9  2266.8 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8723.50     500.68  17.423 1.03e-12 ***
## incidentes     72.99      16.96   4.303 0.000428 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1191 on 18 degrees of freedom
## Multiple R-squared:  0.5071, Adjusted R-squared:  0.4797 
## F-statistic: 18.52 on 1 and 18 DF,  p-value: 0.0004278
ggplot(data = variables,
       aes(x = incidentes, y = matricula_total)) +
  geom_point(size = 3, color = "purple") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(
    title = "Relación entre los incidentes ocurridos en el campus y 
    la matrícula total",
    x = "Incidentes ocurridos dentro del campus",
    y = "Matrícula Total"
  ) +
  theme_minimal()

# Prueba 2
prueba2 <- lm(matricula_total ~ precio_credito, variables)
summary(prueba2)
## 
## Call:
## lm(formula = matricula_total ~ precio_credito, data = variables)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1559.3  -350.7    15.2   427.7  1139.3 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    14645.00     448.90  32.624  < 2e-16 ***
## precio_credito   -35.89       3.70  -9.699 1.43e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 679.7 on 18 degrees of freedom
## Multiple R-squared:  0.8394, Adjusted R-squared:  0.8305 
## F-statistic: 94.07 on 1 and 18 DF,  p-value: 1.427e-08
ggplot(data = variables,
       aes(x = precio_credito, y = matricula_total)) +
  geom_point(size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(
    title = "Relación entre precio del crédito y la matrícula",
    x = "Precio del crédito",
    y = "Matrícula"
  ) +
  theme_minimal()

#Prueba 3
prueba3 <- lm(matricula_total ~ poblacion_15a24, variables)
summary(prueba3)
## 
## Call:
## lm(formula = matricula_total ~ poblacion_15a24, data = variables)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -920.60 -465.62   31.49  300.68 1503.55 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.073e+04  2.278e+03  -4.711 0.000174 ***
## poblacion_15a24  4.825e-02  5.152e-03   9.365 2.43e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 699.9 on 18 degrees of freedom
## Multiple R-squared:  0.8297, Adjusted R-squared:  0.8202 
## F-statistic:  87.7 on 1 and 18 DF,  p-value: 2.428e-08
ggplot(data = variables,
       aes(x = poblacion_15a24, y = matricula_total)) +
  geom_point(size = 3, color = "green3") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(
    title = "Relación entre la población de jóvenes de 15 a 24 años y 
    la matrícula",
    x = "Población de 15 a 24 años",
    y = "Matrícula"
  )

#Prueba 4
prueba4 <- lm(matricula_total ~ cant_concent, variables)
summary(prueba4)
## 
## Call:
## lm(formula = matricula_total ~ cant_concent, data = variables)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1493.0  -747.6  -130.2   799.4  1661.5 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -29434.2     6376.8  -4.616 0.000215 ***
## cant_concent    666.4      106.2   6.274 6.47e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 950.1 on 18 degrees of freedom
## Multiple R-squared:  0.6862, Adjusted R-squared:  0.6687 
## F-statistic: 39.36 on 1 and 18 DF,  p-value: 6.468e-06
ggplot(data = variables,
       aes(x = cant_concent, y = matricula_total)) +
  geom_point(size = 3, color = "gold2") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(
    title = "Relación entre la cantidad de concentraciones y la matrícula",
    x = "Cantidad de Concentraciones por semestre",
    y = "Matricula Total"
  ) +
  theme_minimal()

Gráfica de correlaciones

R <- cor(vars_num, use = "pairwise.complete.obs")

corrplot(R,
         method = "color",
         type = "lower",
         addCoef.col = "black",
         tl.col = "black",
         tl.srt = 45,
         diag = FALSE)

Serie de Tiempo

ts <- ts(variables$matricula_total, 
         start = c(2015, 1), 
         frequency = 2)
print(ts)
## Time Series:
## Start = c(2015, 1) 
## End = c(2024, 2) 
## Frequency = 2 
##  [1] 13014 12374 13472 12788 11981 11040 11657 10842 11254 10388 10877  9979
## [13] 10207  9179  9212  8499  8888  8276  8738  8300
df_ts <- data.frame(
  tiempo = time(ts),
  matricula = as.numeric(ts)
)


ggplot(df_ts, aes(x = tiempo, y = matricula)) +
  geom_line(color = "skyblue", linewidth = 1) +
  geom_point(color = "skyblue", size = 2) +
  labs(
    title = "Evolución de la matrícula subgraduada",
    x = "Año",
    y = "Matrícula total"
  ) +
  theme_minimal()

adf.test(ts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts
## Dickey-Fuller = -3.7305, Lag order = 2, p-value = 0.04068
## alternative hypothesis: stationary
modelo.arima <- auto.arima(ts, stepwise = T, approximation = T)
print(modelo.arima)
## Series: ts 
## ARIMA(0,0,0)(0,1,0)[2] with drift 
## 
## Coefficients:
##           drift
##       -231.9444
## s.e.    64.1151
## 
## sigma^2 = 313402:  log likelihood = -138.92
## AIC=281.85   AICc=282.65   BIC=283.63
prediccion <- forecast(modelo.arima, h = 4)
print(prediccion)
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2025.00       8274.111 7556.669 8991.553 7176.878 9371.344
## 2025.50       7836.111 7118.669 8553.553 6738.878 8933.344
## 2026.00       7810.222 6795.606 8824.839 6258.500 9361.944
## 2026.50       7372.222 6357.606 8386.839 5820.500 8923.944
autoplot(prediccion) +
  labs(
    title = "Pronóstico de la matrícula subgraduada (2 años)",
    subtitle = "Modelo ARIMA seleccionado automáticamente",
    x = "Año",
    y = "Matrícula total"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )