library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
rm(list=ls())
#datos ----
dataset<- read.csv("Oyentes_Mensuales_de_Spotify_LUIS.csv")
View(dataset)
subdata<- dataset[209:nrow(dataset), 1:3]
view(subdata)
descriptivo <- function(dependiente, independiente){
  library(ggplot2)
  library(car)
  n <- length(dependiente)
  sum1<- summary(dependiente)
  sum1["Std. Dev"] <- sd(dependiente)
  print(sum1)
  print("")
  
  print(ks.test(dependiente, "pnorm", mean(dependiente), sd(dependiente)))
  print("")
  boxplot(dependiente, names=c("Oyentes mensuales"), col=c("skyblue"), main="Distribución de las variables del modelo")
  data <- data.frame(Día = independiente, Oyentes = dependiente)
  # Crear el gráfico de dispersión
  ggplot(data, aes(x = Día, y = Oyentes)) +
    geom_point(color = "red", size = 2, alpha = 0.6) +  # Puntos de dispersión
    labs(title = "Gráfico de Dispersión: Oyentes vs. Día",
         x = "Fecha",
         y = "Oyentes Mensuales (*100)") +
    scale_x_date(date_breaks = "2 week", date_labels = "%b %Y") + 
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) 
}
library(lubridate)
Sys.setlocale("LC_TIME", "es_ES.UTF-8")
## [1] "es_ES.UTF-8"
independiente<- as.factor(subdata$Fecha)

meses <- c("ene" = "01", "feb" = "02", "mar" = "03", "abr" = "04", "may" = "05", "jun" = "06", 
           "jul" = "07", "ago" = "08", "sep" = "09", "oct" = "10", "nov" = "11", "dic" = "12")

# Función para reemplazar el mes en el formato "d mes yyyy"
convertir_fecha <- function(fecha) {
  for (mes in names(meses)) {
    if (grepl(mes, fecha)) {
      fecha <- gsub(mes, meses[mes], fecha)
      break
    }
  }
  return(fecha)
}

# Aplicar la conversión a todas las fechas
fechas_numericas <- sapply(independiente, convertir_fecha)


independiente <- dmy(fechas_numericas)
dependiente<- (subdata$Oyentes.Mensuales)/100
view(independiente)
descriptivo(dependiente, independiente)
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. Std. Dev 
##   106.02   136.53   158.37   166.92   196.30   228.36    37.68 
## [1] ""
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  dependiente
## D = 0.11881, p-value = 0.02588
## alternative hypothesis: two-sided
## 
## [1] ""

MRLS <- function(dependiente, independiente){
  library(ggplot2)
  library(lmtest)
  
  datos <- data.frame(dependiente, independiente)
  modelo_lineal <- lm(dependiente~independiente, datos)
  print(summary(modelo_lineal))

  
  confint(modelo_lineal)
  
  
  print(ggplot(data = datos, mapping = aes(x = independiente, y = dependiente)) +
    geom_point(color = "firebrick", size = 2) +
    labs(title  =  'Diagrama de dispersion', x  =  'Fecha', y='Oyentes mensuales (*100)') +
    geom_smooth(method = "lm", se = TRUE, color = "black") +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5)))
  
 
 
  
  errores <- modelo_lineal$residuals
  
  # Verificación de linealidad
  plot(fitted(modelo_lineal), errores,
       main = "Residuos vs Valores Ajustados",
       xlab = "Valores Ajustados",
       ylab = "Residuos",
       pch = 20,
       col = "blue")
  abline(h = 0, col = "red", lwd = 2)
  
  #Verificación de normalidad
  n <- length(dependiente)
  
  print(ks.test(errores, "pnorm", mean(errores), sd(errores)))

  
  #Verificación de homocedasticidad
  print(bptest(modelo_lineal))
  
  
  #Verificación de independenciad de los errores
  print(dwtest(modelo_lineal))
  
  return(modelo_lineal)
}
modelo<- MRLS(dependiente, independiente)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Call:
## lm(formula = dependiente ~ independiente, data = datos)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.882 -13.372  -2.626  12.647  35.729 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.466e+04  6.670e+02  -21.98   <2e-16 ***
## independiente  7.388e-01  3.323e-02   22.23   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.33 on 152 degrees of freedom
## Multiple R-squared:  0.7648, Adjusted R-squared:  0.7633 
## F-statistic: 494.3 on 1 and 152 DF,  p-value: < 2.2e-16
## `geom_smooth()` using formula = 'y ~ x'

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  errores
## D = 0.075904, p-value = 0.3375
## alternative hypothesis: two-sided
## 
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo_lineal
## BP = 44.093, df = 1, p-value = 3.132e-11
## 
## 
##  Durbin-Watson test
## 
## data:  modelo_lineal
## DW = 0.0090195, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
predecir<- function(dependiente, independiente, modelo){
  library(ggplot2)
  library(scales)
  library(dplyr)
  ultima_fecha <- max(independiente, na.rm = TRUE)
  

# Crear nuevas fechas (por ejemplo, 5 años después)
nuevas_fechas <- seq(from = ultima_fecha, by = "day", length.out = 30)  # Incluye el año actual + 5 años

# Convertir a dataframe para predecir
nuevos_datos <- data.frame(independiente = nuevas_fechas)

# Hacer la predicción
predicciones <- predict(modelo, newdata = nuevos_datos, interval="confidence")
#predicciones <- log10(predicciones)

# Ver resultados
datos_pred <- data.frame(
  independiente = nuevas_fechas,
  dependiente = predicciones[, "fit"],
  lwr = predicciones[, "lwr"],  # Límite inferior
  upr = predicciones[, "upr"]   # Límite superior
)

datos <- data.frame(independiente, dependiente)
datos_completos <- data.frame(
  independiente = c(datos$independiente, datos_pred$independiente),
  dependiente_modelo = predict(modelo, newdata = data.frame(independiente = c(datos$independiente, datos_pred$independiente)))
)

ggplot() +
  # Puntos originales en rojo
  geom_line(data = datos, aes(x = independiente, y = dependiente), 
             color = "firebrick", size = 2) +
  
  # Línea negra con la tendencia del modelo
  geom_line(data = datos_completos, aes(x = independiente, y = dependiente_modelo), 
            color = "black", size = 1) +
  
  # Puntos de predicción en verde (sin conectar)
  geom_line(data = datos_pred, aes(x = independiente, y = dependiente), 
             color = "darkgreen", size = 2) +
  
  # Intervalo de confianza en azul
  geom_ribbon(data = datos_pred, aes(x = independiente, ymin = lwr, ymax = upr), 
              alpha = 0.2, fill = "black") +
  
  # Etiquetas y formato
  labs(title = 'Modelo de Regresión con Predicciones',
       x = 'Día',
       y = 'Oyentes Mensuales (Escala log_10(valor)/100)') +
  
  # Más etiquetas en el eje X
  scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +  
  
  # Más etiquetas en el eje Y
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = comma_format()) +
  
  # Mejor formato visual
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),  
        axis.text.y = element_text(size = 10),              
        plot.title = element_text(hjust = 0.5))

}
predecir(dependiente, independiente, modelo)
## Warning: package 'scales' was built under R version 4.3.3
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.