Proyecto Estadistica 1

Paquetes usados

Importar y limpiar los datos

rentabilidadxENTIDADES <- read.csv("C:/2023/Ciclo II/Estadistica1/Proyecto Estadistica/RentabilidadxENTIDADES.csv", skip = 5, dec = ",") %>% remove_empty()

colnames(rentabilidadxENTIDADES) <- c("Mes", "histBAC", "anuBAC", "histBCR", "anuBCR", "histBN", "anuBN", "histCCSS", "anuCCSS", "histIBP", "anuIBP", "histINS", "anuINS", "histPOP", "anuPOP", "histVP", "anuVP")

rentRaw <- rentabilidadxENTIDADES

rentabilidadxENTIDADES <- rentabilidadxENTIDADES %>% clean_names() %>% select(-c(hist_ibp,anu_ibp,hist_ins, anu_ins))

nombres_meses <- c(
  "ene", "feb", "mar", "abr", "may", "jun",
  "jul", "ago", "sep", "oct", "nov", "dic"
)

rentabilidadxENTIDADES <- rentabilidadxENTIDADES %>% mutate(year = as.numeric(paste0("20",substr(mes,5,6))), month = match(substr(mes,1,3), nombres_meses))

rentabilidadxENTIDADES <- rentabilidadxENTIDADES %>% 
  mutate(mes = my(paste0(month, "-", year))) %>% 
  select(-month,-year)

rentabilidadHist <- rentabilidadxENTIDADES %>% 
  select(matches("hist"))
rentabilidadHist <- cbind(rentabilidadxENTIDADES[,1], rentabilidadHist)

rentabilidadAnu <- rentabilidadxENTIDADES %>% 
  select(matches("anu"))
rentabilidadAnu <- cbind(mes = rentabilidadxENTIDADES$mes, rentabilidadAnu)

Analisis descriptivo

# Aplicar una función anónima dentro de sapply para contar datos faltantes por columna
contadorND <- sapply(rentRaw, function(column) sum(column == "ND"))

# Filtrar las columnas con NA > 0
faltantes <- contadorND[contadorND > 0]

# Crear una tabla que muestra la cantidad de datos faltantes por columna (solo las que tienen NA > 0)
tablaND<- data.frame(
  # Columna = names(filtered_missing),
  Fondo = substr(names(faltantes), nchar(names(faltantes)) - 2, nchar(names(faltantes))),
  "Datos Faltantes" = faltantes
)

# Mostrar la tabla de datos faltantes por columna
# print(tablaND)
tablaND %>% xtable(caption = "Cantidad de datos faltantes por fondo") %>% print(include.rownames = F)

% latex table generated in R 4.2.0 by xtable 1.8-4 package
% Sat Nov 18 11:21:49 2023
\begin{table}[ht]
\centering
\begin{tabular}{lr}
  \hline
Fondo & Datos.Faltantes \\ 
  \hline
IBP & 153 \\ 
  IBP & 153 \\ 
  INS & 129 \\ 
  INS & 129 \\ 
   \hline
\end{tabular}
\caption{Cantidad de datos faltantes por fondo} 
\end{table}

# %>% xtable(digits = rep(3,7), align = rep("l",7)) %>% print(include.rownames = T)

tabla<-function(data, variable){
  mean_temp <- mean(data) %>% round(3)
  median_temp <- median(data) %>% round(3)
  min_temp <- min(data) %>% round(3)
  max_temp <- max(data) %>% round(3)
  std_dev_temp <- sd(data) %>% round(3)
  summary_df <- data.frame(
    Statistic = c("Media", "Mediana", "Mínimo", "Máximo", "Desviación estándar"),
    Value = c(mean_temp, median_temp, min_temp, max_temp, std_dev_temp)
)
  colnames(summary_df)<-c("Estadístico", variable)
  return(summary_df)
}

estBAC <- rentabilidadxENTIDADES$anu_bac %>% tabla("BAC")
estBCR <- rentabilidadxENTIDADES$anu_bcr %>% tabla("BCR")
estBN <- rentabilidadxENTIDADES$anu_bn %>% tabla("BN")
estCCSS <- rentabilidadxENTIDADES$anu_ccss %>% tabla("CCSS")
estPOP <- rentabilidadxENTIDADES$anu_pop %>% tabla("POP")
estVP <- rentabilidadxENTIDADES$anu_vp %>% tabla("VP")
estadisticos <- cbind(estBAC,BCR = estBCR[,2], BN = estBN[,2], CCSS = estCCSS[,2], POP = estPOP[,2], VP = estVP[,2]) %>% t()
colnames(estadisticos) <- as.character(estadisticos[1, ])
estadisticos <- estadisticos[-1, ] %>% t()


estTex <-xtable(estadisticos, digits = rep(3,7), align = rep("l",7), caption = "Fuente: Elaboración propia con datos de SUPEN.")

print(estTex, include.rownames = T)

% latex table generated in R 4.2.0 by xtable 1.8-4 package
% Sat Nov 18 11:21:49 2023
\begin{table}[ht]
\centering
\begin{tabular}{lllllll}
  \hline
 & BAC & BCR & BN & CCSS & POP & VP \\ 
  \hline
Media &   8.044 &  8.646 &  8.791 &  9.576 &  8.436 &  9.414 \\ 
  Mediana &   8.010 &  8.800 &  8.505 &  9.580 &  8.695 &  8.785 \\ 
  Mínimo & -11.500 & -5.080 & -8.210 & -7.990 & -7.830 & -2.970 \\ 
  Máximo &  21.590 & 17.430 & 19.500 & 20.750 & 21.310 & 21.550 \\ 
  Desviación estándar &   6.184 &  4.401 &  5.053 &  5.818 &  5.558 &  4.707 \\ 
   \hline
\end{tabular}
\caption{Fuente: Elaboración propia con datos de SUPEN.} 
\end{table}

# Seleccionar las columnas 2 a 7
subset_data <- rentabilidadAnu[, 2:7]
colnames(subset_data) <- toupper(substring(colnames(subset_data), first = 5))
# Calcular la matriz de correlación
correlacion <- cor(subset_data)

# Crear una tabla de correlación
tabCorrelacion <- as.data.frame(correlacion)

# Mostrar la tabla de correlación
print(tabCorrelacion)

           BAC       BCR        BN      CCSS       POP        VP
BAC  1.0000000 0.9752659 0.9531038 0.9126427 0.9360208 0.8594007
BCR  0.9752659 1.0000000 0.9641714 0.9301698 0.9372886 0.8891529
BN   0.9531038 0.9641714 1.0000000 0.9111693 0.9232444 0.9258334
CCSS 0.9126427 0.9301698 0.9111693 1.0000000 0.9338930 0.9011447
POP  0.9360208 0.9372886 0.9232444 0.9338930 1.0000000 0.8720936
VP   0.8594007 0.8891529 0.9258334 0.9011447 0.8720936 1.0000000

tabCorrelacion %>% xtable(digits = rep(3,7), align = rep("l",7)) %>% print(include.rownames = T)

% latex table generated in R 4.2.0 by xtable 1.8-4 package
% Sat Nov 18 11:21:49 2023
\begin{table}[ht]
\centering
\begin{tabular}{lllllll}
  \hline
 & BAC & BCR & BN & CCSS & POP & VP \\ 
  \hline
BAC & 1.000 & 0.975 & 0.953 & 0.913 & 0.936 & 0.859 \\ 
  BCR & 0.975 & 1.000 & 0.964 & 0.930 & 0.937 & 0.889 \\ 
  BN & 0.953 & 0.964 & 1.000 & 0.911 & 0.923 & 0.926 \\ 
  CCSS & 0.913 & 0.930 & 0.911 & 1.000 & 0.934 & 0.901 \\ 
  POP & 0.936 & 0.937 & 0.923 & 0.934 & 1.000 & 0.872 \\ 
  VP & 0.859 & 0.889 & 0.926 & 0.901 & 0.872 & 1.000 \\ 
   \hline
\end{tabular}
\end{table}

Graficos de linea, dispersion y cajas

# Convertir el dataframe a formato largo
rentAnuLong <- rentabilidadAnu %>% pivot_longer(cols = -mes, names_to = "Variable", values_to = "Rendimiento")
rentAnuLong <- rentAnuLong %>% mutate(Variable = toupper(substring(Variable, first = 5)))

# GRAFICO DE LINEA
# Crear el gráfico de líneas
(plotLinRend <- rentAnuLong %>% ggplot(aes(x = mes, y = Rendimiento, color = Variable)) +
  geom_line(size = 1) +
  # labs(title = "Gráfico de Líneas de Rendimientos", x = "Fecha", y = "Rendimiento") +
  labs(x = "Fecha", y = "Rendimiento", caption = "Fuente: Elaboración propia con datos de SUPEN.")+
  theme(plot.caption = element_text(hjust = 0)) +
  theme(legend.position = "top") +
  # scale_color_brewer(palette = "Dark2") +
  scale_x_date(date_breaks = "2 year", date_labels = "%Y") + 
  scale_color_viridis(discrete = TRUE, option = "H") +
  scale_fill_viridis(discrete = TRUE) +
  theme(legend.position = "right")
  )

pdf("linRend.pdf", width = 10)
plotLinRend
dev.off()

png 
  2

# GRAFICO DE DISPERSION
( plotDispRend <- rentAnuLong %>% ggplot(aes(x = mes, y = Rendimiento, color = Variable)) +
  geom_point(size = 1, alpha = 0.6) +
  # labs(title = "Gráfico de Dispersión de Rendimientos", x = "Mes", y = "Rendimiento") +
  labs(x = "Mes", y = "Rendimiento", caption = "Fuente: Elaboración propia con datos de SUPEN.")+
  theme(plot.caption = element_text(hjust = 0)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  scale_color_brewer(palette = "Dark2") +
  scale_x_date(date_breaks = "2 year", date_labels = "%Y") + 
  scale_color_viridis(discrete = TRUE, option = "H") +
  scale_fill_viridis(discrete = TRUE) +
  theme(legend.position = "right")
  )

pdf("dispRend.pdf", width = 10)
plotDispRend
dev.off()

png 
  2

# GRAFICO DE CAJAS
(plotBoxRend <- rentAnuLong %>% ggplot(aes(x = Variable, y = Rendimiento,  fill = Variable)) +
  geom_boxplot(color = "darkgrey", show.legend = F)+#(outlier.shape = NA) +  
  # labs(title = "Gráfico de Caja de Rendimientos", x = "Variable", y = "Rendimiento") +
  labs(x = NULL, y = "Rendimiento", caption = "Fuente: Elaboración propia con datos de SUPEN.")+
  theme(plot.caption = element_text(hjust = 0)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  # scale_color_viridis(discrete = TRUE, option = "H") +
  scale_fill_viridis(discrete = TRUE, option = "H") +
  theme(legend.position = "right")
)

pdf("boxRend.pdf", width = 10)
plotBoxRend
dev.off()

png 
  2

Distribución de los rendimientos por fondo

data <- rentabilidadAnu[,-1]

hist_list_ggplot <- lapply(names(data), function(col) {
  ggplot(data, aes_string(x = col, fill = col)) +
    geom_histogram(binwidth = 0.5, fill = "lightblue", color = "black") +
    labs(title = toupper(substring(col, first = 5)), x = "Rendimiento", y = "Cantidad")
})

histArray <- grid.arrange(grobs = hist_list_ggplot, ncol = 2)
caption_text <- "Fuente: Elaboración propia con datos de SUPEN."
grid.text(
  x = 0.5,
  y = 0.01,
  label = caption_text,
  just = "right"
)

histArray

TableGrob (3 x 2) "arrange": 6 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (1-1,2-2) arrange gtable[layout]
3 3 (2-2,1-1) arrange gtable[layout]
4 4 (2-2,2-2) arrange gtable[layout]
5 5 (3-3,1-1) arrange gtable[layout]
6 6 (3-3,2-2) arrange gtable[layout]

pdf("histAnu.pdf", width = 10)
histArray <- grid.arrange(grobs = hist_list_ggplot, ncol = 2)
grid.text(
  x = 0.5,  # Adjust the x-coordinate to center the caption
  y = 0.01,# Adjust the y-coordinate to set the position of the caption
  label = "Fuente: Elaboración propia con datos de SUPEN.",
  just = "right"
)
histArray

TableGrob (3 x 2) "arrange": 6 grobs
  z     cells    name           grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (1-1,2-2) arrange gtable[layout]
3 3 (2-2,1-1) arrange gtable[layout]
4 4 (2-2,2-2) arrange gtable[layout]
5 5 (3-3,1-1) arrange gtable[layout]
6 6 (3-3,2-2) arrange gtable[layout]

dev.off()

png 
  2

Resultados

pdf("acf.pdf")
rentabilidadAnu %>% acf()
dev.off()

png 
  2

adf.test(rentabilidadAnu$anu_bac)


    Augmented Dickey-Fuller Test

data:  rentabilidadAnu$anu_bac
Dickey-Fuller = -4.7553, Lag order = 5, p-value = 0.01
alternative hypothesis: stationary

library(tseries)
data <- rentabilidadAnu
resultados <- c()
# Realizar la prueba ADF para cada fondo
for (i in 2:7) {
  # Realizar la prueba ADF
  adf_result <- adf.test(data[, i])
  # Extraer el valor p y el estadístico de la prueba
  p_value <- adf_result$p.value
  test_statistic <- adf_result$statistic
  # Almacenar los resultados
  resultados <- c(resultados, c(p_value, test_statistic))
}

# Crear una tabla LaTeX con los resultados
tabla_latex <- matrix(resultados, ncol = 2, byrow = TRUE)
colnames(tabla_latex) <- c("Valor p", "Estadístico ADF")
rownames(tabla_latex) <- toupper(substring(colnames(data)[2:7], first = 5))

# Imprimir la tabla LaTeX
print(xtable(tabla_latex, caption = "Resultados de la prueba ADF para las columnas 2 a 7 de rentabilidadAnu."), 
      caption.placement = "top", 
      include.rownames = TRUE, 
      include.colnames = TRUE, 
      table.placement = "h")

% latex table generated in R 4.2.0 by xtable 1.8-4 package
% Sat Nov 18 11:21:54 2023
\begin{table}[h]
\centering
\caption{Resultados de la prueba ADF para las columnas 2 a 7 de rentabilidadAnu.} 
\begin{tabular}{rrr}
  \hline
 & Valor p & Estadístico ADF \\ 
  \hline
BAC & 0.01 & -4.76 \\ 
  BCR & 0.01 & -5.10 \\ 
  BN & 0.01 & -4.42 \\ 
  CCSS & 0.01 & -5.01 \\ 
  POP & 0.01 & -4.04 \\ 
  VP & 0.01 & -4.88 \\ 
   \hline
\end{tabular}
\end{table}

library(stats)
data <- rentabilidadAnu[, 2:7]
# Crear una matriz para almacenar los resultados
num_var <- ncol(data)
resultados <- matrix(NA, ncol = 3, nrow = num_var * (num_var - 1) / 2)
colnames(resultados) <- c("Fondo 1", "Fondo 2", "Valor p KS")
# Realizar la prueba KS para todos los pares de variables
row_idx <- 1
for (i in 1:(num_var - 1)) {
  for (j in (i + 1):num_var) {
    var1 <- data[, i]
    var2 <- data[, j]
    # Realizar la prueba KS
    ks_result <- ks.test(var1, var2)
    # Almacenar los resultados en la matriz
    resultados[row_idx, 1] <- toupper(substring(colnames(data)[i], first = 5))
    resultados[row_idx, 2] <- toupper(substring(colnames(data)[j], first = 5))
    resultados[row_idx, 3] <- round(ks_result$p.value,3)
    row_idx <- row_idx + 1
  }
}
# Ordenar los resultados por valor p de menor a mayor
resultados <- resultados[order(resultados[, 3]), ]
# Imprimir la tabla LaTeX
print(xtable(resultados, caption = "Resultados de la prueba KS para todas las combinaciones de pares de variables (columnas 2 a 7) de rentabilidadAnu."),
      digits = rep(3,3),
      caption.placement = "top", 
      include.rownames = FALSE, 
      include.colnames = TRUE, 
      table.placement = "h")

% latex table generated in R 4.2.0 by xtable 1.8-4 package
% Sat Nov 18 11:21:55 2023
\begin{table}[h]
\centering
\caption{Resultados de la prueba KS para todas las combinaciones de pares de variables (columnas 2 a 7) de rentabilidadAnu.} 
\begin{tabular}{lll}
  \hline
Fondo 1 & Fondo 2 & Valor p KS \\ 
  \hline
BAC & CCSS & 0 \\ 
  CCSS & POP & 0.001 \\ 
  BN & CCSS & 0.002 \\ 
  BCR & CCSS & 0.005 \\ 
  BAC & VP & 0.013 \\ 
  CCSS & VP & 0.035 \\ 
  POP & VP & 0.047 \\ 
  BN & POP & 0.082 \\ 
  BAC & BN & 0.136 \\ 
  BAC & BCR & 0.172 \\ 
  BCR & POP & 0.172 \\ 
  BCR & VP & 0.172 \\ 
  BN & VP & 0.736 \\ 
  BAC & POP & 0.819 \\ 
  BCR & BN & 0.89 \\ 
   \hline
\end{tabular}
\end{table}

Como el valor p de la prueba dickey fuller es menor a \alpha = 0.05, se puede rechazar la hipotesis nula al 95\% de confianza. Por tanto como la hipotesis alternativa es que los datos se comportan de manera estacional concluimos que este es el caso.

forecast_data <- rentabilidadAnu$anu_bcr %>% ts() %>% auto.arima() %>% forecast()

forecast_data %>% plot()

Regresión Lineal

# Hacer una columna con los números----------------------------------------------
# Asegúrate de que la columna 'fecha' esté en formato de fecha
rentabilidadAnu$mes <- as.Date(rentabilidadAnu$mes)

# Calcula los números de mes secuenciales y crea una nueva columna 'numero_mes'
rentabilidadAnuNorm <- rentabilidadAnu %>% 
  mutate(numero_mes = as.integer(format(mes, "%Y")) * 12 + as.integer(format(mes, "%m")) - (min(as.integer(format(rentabilidadAnu$mes, "%Y")) * 12 + as.integer(format(rentabilidadAnu$mes, "%m"))) - 1))

# Ajustar un modelo de regresión lineal
modelo <- lm(anu_bac ~ numero_mes, data = rentabilidadAnuNorm)
# Resumen del modelo
summary(modelo)


Call:
lm(formula = anu_bac ~ numero_mes, data = rentabilidadAnuNorm)

Residuals:
     Min       1Q   Median       3Q      Max 
-18.6535  -1.7470   0.0239   2.4965  14.2637 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.021402   0.924415   9.759   <2e-16 ***
numero_mes  -0.010797   0.008858  -1.219    0.225    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 6.175 on 178 degrees of freedom
Multiple R-squared:  0.008277,  Adjusted R-squared:  0.002706 
F-statistic: 1.486 on 1 and 178 DF,  p-value: 0.2245

# Crear un gráfico de dispersión y de regresión con ggplot2
(lmBac <- ggplot(rentabilidadAnuNorm, aes(x = mes, y = anu_bac)) +
  geom_point(color = "blue", size = 1) +  # Puntos azules para los datos
  geom_smooth(method = "lm", color = "red", se = FALSE) +  # Línea de regresión
  labs(title = NULL, x = "Fecha", y = "Rendimiento Bac")+#, caption = "Fuente: Elaboración propia con datos de SUPEN.")+
  theme(plot.caption = element_text(hjust = 0))+
  scale_x_date(date_breaks = "2 year", date_labels = "%Y")
)

pdf("lmBac.pdf", width = 10)
lmBac
dev.off()

png 
  2

# Ajustar un modelo de regresión lineal
modelo <- lm(anu_bcr ~ numero_mes, data = rentabilidadAnuNorm)
# Resumen del modelo
summary(modelo)


Call:
lm(formula = anu_bcr ~ numero_mes, data = rentabilidadAnuNorm)

Residuals:
     Min       1Q   Median       3Q      Max 
-12.5609  -1.9090  -0.0368   2.5939   9.7009 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.893582   0.651800  15.179   <2e-16 ***
numero_mes  -0.013787   0.006246  -2.207   0.0286 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.354 on 178 degrees of freedom
Multiple R-squared:  0.02664,   Adjusted R-squared:  0.02117 
F-statistic: 4.872 on 1 and 178 DF,  p-value: 0.02857

# Crear un gráfico de dispersión y de regresión con ggplot2
(lmBCR <- ggplot(rentabilidadAnuNorm, aes(x = mes, y = anu_bcr)) +
  geom_point(color = "blue", size = 1) +  # Puntos azules para los datos
  geom_smooth(method = "lm", color = "red", se = FALSE) +  # Línea de regresión
  labs(title = NULL, x = "Fecha", y = "Rendimiento BCR")+#, caption = "Fuente: Elaboración propia con datos de SUPEN.")+
  theme(plot.caption = element_text(hjust = 0))+
  scale_x_date(date_breaks = "2 year", date_labels = "%Y")
)

pdf("lmBCR.pdf", width = 10)
lmBCR
dev.off()

png 
  2

# Ajustar un modelo de regresión lineal
modelo <- lm(anu_bn ~ numero_mes, data = rentabilidadAnuNorm)
# Resumen del modelo
summary(modelo)


Call:
lm(formula = anu_bn ~ numero_mes, data = rentabilidadAnuNorm)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.5375  -1.7715  -0.4723   2.5950  11.8243 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 10.43625    0.74510  14.007   <2e-16 ***
numero_mes  -0.01818    0.00714  -2.546   0.0117 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.977 on 178 degrees of freedom
Multiple R-squared:  0.03514,   Adjusted R-squared:  0.02972 
F-statistic: 6.483 on 1 and 178 DF,  p-value: 0.01174

# Crear un gráfico de dispersión y de regresión con ggplot2
(lmBN <- ggplot(rentabilidadAnuNorm, aes(x = mes, y = anu_bn)) +
  geom_point(color = "blue", size = 1) +  # Puntos azules para los datos
  geom_smooth(method = "lm", color = "red", se = FALSE) +  # Línea de regresión
  labs(title = NULL, x = "Fecha", y = "Rendimiento BN")+#, caption = "Fuente: Elaboración propia con datos de SUPEN.")+
  theme(plot.caption = element_text(hjust = 0))+
  scale_x_date(date_breaks = "2 year", date_labels = "%Y")
)

pdf("lmBN.pdf", width = 10)
lmBN
dev.off()

png 
  2

# Ajustar un modelo de regresión lineal
modelo <- lm(anu_ccss ~ numero_mes, data = rentabilidadAnuNorm)
# Resumen del modelo
summary(modelo)


Call:
lm(formula = anu_ccss ~ numero_mes, data = rentabilidadAnuNorm)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.1572  -2.7702  -0.0616   3.3255  13.0039 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 12.283154   0.841374  14.599  < 2e-16 ***
numero_mes  -0.029918   0.008063  -3.711 0.000276 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.621 on 178 degrees of freedom
Multiple R-squared:  0.0718,    Adjusted R-squared:  0.06659 
F-statistic: 13.77 on 1 and 178 DF,  p-value: 0.0002761

# Crear un gráfico de dispersión y de regresión con ggplot2
(lmCCSS <- ggplot(rentabilidadAnuNorm, aes(x = mes, y = anu_ccss)) +
  geom_point(color = "blue", size = 1) +  # Puntos azules para los datos
  geom_smooth(method = "lm", color = "red", se = FALSE) +  # Línea de regresión
  labs(title = NULL, x = "Fecha", y = "Rendimiento CCSS")+#, caption = "Fuente: Elaboración propia con datos de SUPEN.")+
  theme(plot.caption = element_text(hjust = 0))+
  scale_x_date(date_breaks = "2 year", date_labels = "%Y")
)

pdf("lmCCSS.pdf", width = 10)
lmCCSS
dev.off()

png 
  2

# Ajustar un modelo de regresión lineal
modelo <- lm(anu_pop ~ numero_mes, data = rentabilidadAnuNorm)
# Resumen del modelo
summary(modelo)


Call:
lm(formula = anu_pop ~ numero_mes, data = rentabilidadAnuNorm)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.9507  -2.3910   0.2368   2.3703  13.1158 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  8.785786   0.833853  10.536   <2e-16 ***
numero_mes  -0.003867   0.007990  -0.484    0.629    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.57 on 178 degrees of freedom
Multiple R-squared:  0.001314,  Adjusted R-squared:  -0.004297 
F-statistic: 0.2342 on 1 and 178 DF,  p-value: 0.629

# Crear un gráfico de dispersión y de regresión con ggplot2
(lmPOP <- ggplot(rentabilidadAnuNorm, aes(x = mes, y = anu_pop)) +
  geom_point(color = "blue", size = 1) +  # Puntos azules para los datos
  geom_smooth(method = "lm", color = "red", se = FALSE) +  # Línea de regresión
  labs(title = NULL, x = "Fecha", y = "Rendimiento POP")+#, caption = "Fuente: Elaboración propia con datos de SUPEN.")+
  theme(plot.caption = element_text(hjust = 0))+
  scale_x_date(date_breaks = "2 year", date_labels = "%Y")
)

pdf("lmPOP.pdf", width = 10)
lmPOP
dev.off()

png 
  2

# Ajustar un modelo de regresión lineal
modelo <- lm(anu_vp ~ numero_mes, data = rentabilidadAnuNorm)
# Resumen del modelo
summary(modelo)


Call:
lm(formula = anu_vp ~ numero_mes, data = rentabilidadAnuNorm)

Residuals:
     Min       1Q   Median       3Q      Max 
-10.3772  -2.2796  -0.9573   2.6183  11.3412 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 11.697945   0.678471  17.242  < 2e-16 ***
numero_mes  -0.025239   0.006502  -3.882 0.000146 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.532 on 178 degrees of freedom
Multiple R-squared:  0.07806,   Adjusted R-squared:  0.07288 
F-statistic: 15.07 on 1 and 178 DF,  p-value: 0.0001458

# Crear un gráfico de dispersión y de regresión con ggplot2
(lmVP <- ggplot(rentabilidadAnuNorm, aes(x = mes, y = anu_vp)) +
  geom_point(color = "blue", size = 1) +  # Puntos azules para los datos
  geom_smooth(method = "lm", color = "red", se = FALSE) +  # Línea de regresión
  labs(title = NULL, x = "Fecha", y = "Rendimiento VP")+#, caption = "Fuente: Elaboración propia con datos de SUPEN.")+
  theme(plot.caption = element_text(hjust = 0))+
  scale_x_date(date_breaks = "2 year", date_labels = "%Y")
)

pdf("lmVP.pdf", width = 10)
lmVP
dev.off()

png 
  2

lmList <- list(lmBac, lmBCR, lmBN, lmCCSS, lmPOP, lmVP)

grid.arrange(grobs = lmList, ncol = 2)
caption_text <- "Fuente: Elaboración propia con datos de SUPEN."
grid.text(
  x = 0.5,  # Adjust the x-coordinate to center the caption
  y = 0.01,# Adjust the y-coordinate to set the position of the caption
  label = caption_text,
  just = "right"
)

pdf("regresiones.pdf", width = 10)
grid.arrange(grobs = lmList, ncol = 2)
caption_text <- "Fuente: wElaboración propia con datos de SUPEN."
grid.text(
  x = 0.5,  # Adjust the x-coordinate to center the caption
  y = 0.01,# Adjust the y-coordinate to set the position of the caption
  label = caption_text,
  just = "right"
)
dev.off

function (which = dev.cur()) 
{
    if (which == 1) 
        stop("cannot shut down device 1 (the null device)")
    .External(C_devoff, as.integer(which))
    dev.cur()
}
<bytecode: 0x000001b2b0f0ba80>
<environment: namespace:grDevices>

Prueba de Hipótesis

#Ya tenemos la tabla con los datos de los promedios mensuales
puntos_especificos <- c(139, 177)  # Puntos específicos en el eje x

grafico <- ggplot(data = rentabilidadAnu, aes(x = 1:nrow(rentabilidadAnu), y = anu_bac)) +
  geom_line() +
  geom_vline(xintercept = puntos_especificos, linetype = "dashed", color = "red") +
  labs(title = "Gráfico de Líneas con Líneas Verticales", x = "Índice de Datos", y = "Valor de Datos")

print(grafico)

# Hago un proceso para añadir al dataframe una columna con los datos de cuales son meses de pandemia y cuáles no
# df <- data.frame(datos_numericos = rnorm(180))

etiquetas <- rep("no_pandemia", 180)
etiquetas[133:177] <- "pandemia"

# Agrego dicha columna nueva
rentabilidadAnu$etiqueta_pandemia <- etiquetas

#Test de Varianza F
# vartest <- var.test(anu_bac~etiqueta_pandemia,data = rentabilidadAnu)
# vartest$p.value

#Kolmogotov-Smirnov test
ks.test(anu_bac~etiqueta_pandemia,data = rentabilidadAnu)


    Exact two-sample Kolmogorov-Smirnov test

data:  anu_bac by etiqueta_pandemia
D = 0.31852, p-value = 0.001744
alternative hypothesis: two-sided

#Test Mann-Whitley es una prueba no paramétrica que se utiliza para comparar las medianas de dos grupos independientes.
wilcox.test(anu_bac~etiqueta_pandemia,data = rentabilidadAnu)


    Wilcoxon rank sum test with continuity correction

data:  anu_bac by etiqueta_pandemia
W = 2329.5, p-value = 0.01943
alternative hypothesis: true location shift is not equal to 0

etiquetas <- rep("no_pandemia", 180)
etiquetas[133:177] <- "pandemia"

# Agrego dicha columna nueva
rentabilidadAnu$etiqueta_pandemia <- etiquetas

# Filtra los datos para separar los periodos de pandemia y no pandemia
rentabilidad_pandemia <- rentabilidadAnu %>% filter(etiqueta_pandemia == "pandemia")
rentabilidad_no_pandemia <- rentabilidadAnu %>% filter(etiqueta_pandemia == "no_pandemia")

# rentabilidad_pandemia <- rentabilidad_pandemia %>% mutate(etiqueta_pandemia =  if(any(rentabilidad_pandemia$etiqueta_pandemia == "Pandemia"))) 

# Realiza las pruebas para las columnas 2 a 7
resultados <- data.frame()
for (i in 2:7) {
  # Prueba t.test
  t_test_pandemia <- t.test(rentabilidad_pandemia[, i], rentabilidad_no_pandemia[, i])

  f_test_pandemia <- var.test(rentabilidad_pandemia[, i], rentabilidad_no_pandemia[, i])
  
  # Prueba KS test
  ks_test_pandemia <- ks.test(rentabilidad_pandemia[, i], rentabilidad_no_pandemia[, i])
  
  # Prueba wilcox.test
  wilcox_test_pandemia <- wilcox.test(rentabilidad_pandemia[, i], rentabilidad_no_pandemia[, i])

  # Agrega los resultados a la tabla
  resultados <- rbind(resultados, c(
    Variable =  toupper(substring(colnames(data)[i], first = 5)),
    T_test_Pandemia = t_test_pandemia$p.value,
    f_test_pandemia = f_test_pandemia$p.value,
    KS_test_Pandemia = ks_test_pandemia$p.value,
    Wilcox_test_Pandemia = wilcox_test_pandemia$p.value
  ))
}

colnames(resultados) <- c("Variable", "T_test", "F_test", "KS_test", "Wilcox_test")

# Imprime la tabla de resultados
# print(resultados)

# Imprimir la tabla LaTeX
print(xtable((resultados), caption = "Resultados de las pruebas"),
      caption.placement = "top",
      include.rownames = F,
      include.colnames = T,
      table.placement = "h")

% latex table generated in R 4.2.0 by xtable 1.8-4 package
% Sat Nov 18 11:22:02 2023
\begin{table}[h]
\centering
\caption{Resultados de las pruebas} 
\begin{tabular}{lllll}
  \hline
Variable & T\_test & F\_test & KS\_test & Wilcox\_test \\ 
  \hline
BCR & 0.744934698034123 & 6.46149800331841e-14 & 0.00174448076939382 & 0.0194257745594994 \\ 
  BN & 0.744131832880989 & 2.57571741713036e-14 & 0.00231543658309863 & 0.044759500767872 \\ 
  CCSS & 0.483126015469428 & 0 & 0.000420718354594674 & 0.0626659517181993 \\ 
  POP & 0.385924425065283 & 6.52795595357247e-11 & 0.0646653060406722 & 0.63663533167511 \\ 
  VP & 0.342651386720267 & 0 & 6.40297707077409e-07 & 0.00197625508108504 \\ 
   & 0.730723143355235 & 7.91619563527135e-05 & 9.4768089373809e-05 & 0.0470959203070443 \\ 
   \hline
\end{tabular}
\end{table}

# Chi cuadrado con lag

# Crear columnas de lag para cada fondo
lag_columns <- lapply(rentabilidadAnu[, 2:7], function(x) c(rep(NA, 5), head(x, -5)))


lagged_rentabilidadAnu <- cbind(rentabilidadAnu[, 2:7], lag_columns)

# Eliminar filas con NA resultantes del rezago
lagged_rentabilidadAnu <- na.omit(lagged_rentabilidadAnu)


cero <- min(rentabilidadAnu[, 2:7]) %>% abs()
lagged_rentabilidadAnu <- lagged_rentabilidadAnu + cero 

# Realizar la prueba de chi-cuadrado
(resultado_chi2 <- chisq.test(lagged_rentabilidadAnu))


    Pearson's Chi-squared test

data:  lagged_rentabilidadAnu
X-squared = 879.04, df = 1914, p-value = 1

# Imprimir los resultados
# print(resultado_chi2)

# En este código, se crean columnas de lag para cada fondo en la matriz "rentabilidadAnu", donde los valores rezagados corresponden a los rendimientos de los cinco meses anteriores. Luego, se realiza la prueba de chi-cuadrado utilizando la función chisq.test en la matriz resultante.
# 
# Recuerda ajustar el código según la estructura exacta de tus datos y la información disponible en la matriz "rentabilidadAnu". Este es un ejemplo básico, y la interpretación de los resultados de la prueba de chi-cuadrado dependerá de la naturaleza específica de tus datos y tu pregunta de investigación.

library(tseries)

d <- 12
data <- rentabilidadAnu[,2:7] %>% ts(frequency = d)

par(mfrow = c(2, 3))
for (i in 1:6) {
  entidad <- toupper(substring(colnames(data)[i], first = 5))
  boxplot(data[,i] ~ cycle(data), xlab = "Mes",
        ylab = paste("Rendimiento", entidad),
        col = plasma(d))
  # boxList[i] <- boxPlot #append(boxList,list())
}

pdf("SeasonalBox.pdf", width = 10)
par(mfrow = c(2, 3))
for (i in 1:6) {
  entidad <- toupper(substring(colnames(data)[i], first = 5))
  boxplot(data[,i] ~ cycle(data), xlab = "Mes",
        ylab = paste("Rendimiento", entidad),
        col = plasma(d))
  # boxList[i] <- boxPlot #append(boxList,list())
}
dev.off()

png 
  2

data <- lag(rentabilidadAnu, 5L) %>% remove_empty()
resultados <- c()
# Realizar la prueba ADF para cada fondo
for (i in 2:7) {
  # Realizar la prueba ADF
  (adf_result <- adf.test(data[, i]))
  adf_result %>% print()
  # Extraer el valor p y el estadístico de la prueba
  p_value <- adf_result$p.value
  test_statistic <- adf_result$statistic
  # Almacenar los resultados
  resultados <- c(resultados, c(p_value, test_statistic))
}


    Augmented Dickey-Fuller Test

data:  data[, i]
Dickey-Fuller = -3.3874, Lag order = 5, p-value = 0.05879
alternative hypothesis: stationary


    Augmented Dickey-Fuller Test

data:  data[, i]
Dickey-Fuller = -3.7313, Lag order = 5, p-value = 0.02396
alternative hypothesis: stationary


    Augmented Dickey-Fuller Test

data:  data[, i]
Dickey-Fuller = -3.2505, Lag order = 5, p-value = 0.08161
alternative hypothesis: stationary


    Augmented Dickey-Fuller Test

data:  data[, i]
Dickey-Fuller = -4.242, Lag order = 5, p-value = 0.01
alternative hypothesis: stationary


    Augmented Dickey-Fuller Test

data:  data[, i]
Dickey-Fuller = -3.2693, Lag order = 5, p-value = 0.07847
alternative hypothesis: stationary


    Augmented Dickey-Fuller Test

data:  data[, i]
Dickey-Fuller = -4.0689, Lag order = 5, p-value = 0.01
alternative hypothesis: stationary

# Crear una tabla LaTeX con los resultados
tabla_latex <- matrix(resultados, ncol = 2, byrow = TRUE)
colnames(tabla_latex) <- c("Valor p", "Estadístico ADF")
rownames(tabla_latex) <- toupper(substring(colnames(data)[2:7], first = 5))

# Imprimir la tabla LaTeX
print(xtable(tabla_latex, caption = "Resultados de la prueba ADF para las columnas 2 a 7 de rentabilidadAnu tomando un resago de 5"), 
      caption.placement = "top", 
      include.rownames = TRUE, 
      include.colnames = TRUE, 
      table.placement = "h")

% latex table generated in R 4.2.0 by xtable 1.8-4 package
% Sat Nov 18 11:22:02 2023
\begin{table}[h]
\centering
\caption{Resultados de la prueba ADF para las columnas 2 a 7 de rentabilidadAnu tomando un resago de 5} 
\begin{tabular}{rrr}
  \hline
 & Valor p & Estadístico ADF \\ 
  \hline
BAC & 0.06 & -3.39 \\ 
  BCR & 0.02 & -3.73 \\ 
  BN & 0.08 & -3.25 \\ 
  CCSS & 0.01 & -4.24 \\ 
  POP & 0.08 & -3.27 \\ 
  VP & 0.01 & -4.07 \\ 
   \hline
\end{tabular}
\end{table}