library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
rm(list=ls())
#datos ----
dataset<- read.csv("Oyentes_Mensuales_de_Spotify_LUIS.csv")
View(dataset)
subdata<- dataset[209:nrow(dataset), 1:3]
view(subdata)
descriptivo <- function(dependiente, independiente){
library(ggplot2)
library(car)
n <- length(dependiente)
sum1<- summary(dependiente)
sum1["Std. Dev"] <- sd(dependiente)
print(sum1)
print("")
print(ks.test(dependiente, "pnorm", mean(dependiente), sd(dependiente)))
print("")
boxplot(dependiente, names=c("Oyentes mensuales"), col=c("skyblue"), main="Distribución de las variables del modelo")
data <- data.frame(Día = independiente, Oyentes = dependiente)
# Crear el gráfico de dispersión
ggplot(data, aes(x = Día, y = Oyentes)) +
geom_point(color = "red", size = 2, alpha = 0.6) + # Puntos de dispersión
labs(title = "Gráfico de Dispersión: Oyentes vs. Día",
x = "Fecha",
y = "Oyentes Mensuales (*100)") +
scale_x_date(date_breaks = "2 week", date_labels = "%b %Y") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
library(lubridate)
Sys.setlocale("LC_TIME", "es_ES.UTF-8")
## [1] "es_ES.UTF-8"
independiente<- as.factor(subdata$Fecha)
meses <- c("ene" = "01", "feb" = "02", "mar" = "03", "abr" = "04", "may" = "05", "jun" = "06",
"jul" = "07", "ago" = "08", "sep" = "09", "oct" = "10", "nov" = "11", "dic" = "12")
# Función para reemplazar el mes en el formato "d mes yyyy"
convertir_fecha <- function(fecha) {
for (mes in names(meses)) {
if (grepl(mes, fecha)) {
fecha <- gsub(mes, meses[mes], fecha)
break
}
}
return(fecha)
}
# Aplicar la conversión a todas las fechas
fechas_numericas <- sapply(independiente, convertir_fecha)
independiente <- dmy(fechas_numericas)
dependiente<- (subdata$Oyentes.Mensuales)/100
view(independiente)
descriptivo(dependiente, independiente)
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
## Min. 1st Qu. Median Mean 3rd Qu. Max. Std. Dev
## 106.02 136.53 158.37 166.92 196.30 228.36 37.68
## [1] ""
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: dependiente
## D = 0.11881, p-value = 0.02588
## alternative hypothesis: two-sided
##
## [1] ""


MRLS <- function(dependiente, independiente){
library(ggplot2)
library(lmtest)
datos <- data.frame(dependiente, independiente)
modelo_lineal <- lm(dependiente~independiente, datos)
print(summary(modelo_lineal))
confint(modelo_lineal)
print(ggplot(data = datos, mapping = aes(x = independiente, y = dependiente)) +
geom_point(color = "firebrick", size = 2) +
labs(title = 'Diagrama de dispersion', x = 'Fecha', y='Oyentes mensuales (*100)') +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)))
errores <- modelo_lineal$residuals
# Verificación de linealidad
plot(fitted(modelo_lineal), errores,
main = "Residuos vs Valores Ajustados",
xlab = "Valores Ajustados",
ylab = "Residuos",
pch = 20,
col = "blue")
abline(h = 0, col = "red", lwd = 2)
#Verificación de normalidad
n <- length(dependiente)
print(ks.test(errores, "pnorm", mean(errores), sd(errores)))
#Verificación de homocedasticidad
print(bptest(modelo_lineal))
#Verificación de independenciad de los errores
print(dwtest(modelo_lineal))
return(modelo_lineal)
}
modelo<- MRLS(dependiente, independiente)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Call:
## lm(formula = dependiente ~ independiente, data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.882 -13.372 -2.626 12.647 35.729
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.466e+04 6.670e+02 -21.98 <2e-16 ***
## independiente 7.388e-01 3.323e-02 22.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.33 on 152 degrees of freedom
## Multiple R-squared: 0.7648, Adjusted R-squared: 0.7633
## F-statistic: 494.3 on 1 and 152 DF, p-value: < 2.2e-16
## `geom_smooth()` using formula = 'y ~ x'


##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: errores
## D = 0.075904, p-value = 0.3375
## alternative hypothesis: two-sided
##
##
## studentized Breusch-Pagan test
##
## data: modelo_lineal
## BP = 44.093, df = 1, p-value = 3.132e-11
##
##
## Durbin-Watson test
##
## data: modelo_lineal
## DW = 0.0090195, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
predecir<- function(dependiente, independiente, modelo){
library(ggplot2)
library(scales)
library(dplyr)
ultima_fecha <- max(independiente, na.rm = TRUE)
# Crear nuevas fechas (por ejemplo, 5 años después)
nuevas_fechas <- seq(from = ultima_fecha, by = "day", length.out = 30) # Incluye el año actual + 5 años
# Convertir a dataframe para predecir
nuevos_datos <- data.frame(independiente = nuevas_fechas)
# Hacer la predicción
predicciones <- predict(modelo, newdata = nuevos_datos, interval="confidence")
#predicciones <- log10(predicciones)
# Ver resultados
datos_pred <- data.frame(
independiente = nuevas_fechas,
dependiente = predicciones[, "fit"],
lwr = predicciones[, "lwr"], # Límite inferior
upr = predicciones[, "upr"] # Límite superior
)
datos <- data.frame(independiente, dependiente)
datos_completos <- data.frame(
independiente = c(datos$independiente, datos_pred$independiente),
dependiente_modelo = predict(modelo, newdata = data.frame(independiente = c(datos$independiente, datos_pred$independiente)))
)
ggplot() +
# Puntos originales en rojo
geom_line(data = datos, aes(x = independiente, y = dependiente),
color = "firebrick", size = 2) +
# Línea negra con la tendencia del modelo
geom_line(data = datos_completos, aes(x = independiente, y = dependiente_modelo),
color = "black", size = 1) +
# Puntos de predicción en verde (sin conectar)
geom_line(data = datos_pred, aes(x = independiente, y = dependiente),
color = "darkgreen", size = 2) +
# Intervalo de confianza en azul
geom_ribbon(data = datos_pred, aes(x = independiente, ymin = lwr, ymax = upr),
alpha = 0.2, fill = "black") +
# Etiquetas y formato
labs(title = 'Modelo de Regresión con Predicciones',
x = 'Día',
y = 'Oyentes Mensuales (Escala log_10(valor)/100)') +
# Más etiquetas en el eje X
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
# Más etiquetas en el eje Y
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = comma_format()) +
# Mejor formato visual
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5))
}
predecir(dependiente, independiente, modelo)
## Warning: package 'scales' was built under R version 4.3.3
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
