library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
rm(list=ls())
#datos ----
dataset<- read.csv("Oyentes_Mensuales_de_Spotify_OMAY.csv")
View(dataset)
subdata<- dataset[151:nrow(dataset), 1:3]
view(subdata)
descriptivo <- function(dependiente, independiente){
library(ggplot2)
library(car)
n <- length(dependiente)
sum1<- summary(dependiente)
sum1["Std. Dev"] <- sd(dependiente)
print(sum1)
print("")
print(ks.test(dependiente, "pnorm", mean(dependiente), sd(dependiente)))
print("")
boxplot(dependiente, names=c("Oyentes mensuales"), col=c("skyblue"), main="Distribución de las variables del modelo")
data <- data.frame(Día = independiente, Oyentes = dependiente)
# Crear el gráfico de dispersión
ggplot(data, aes(x = Día, y = Oyentes)) +
geom_point(color = "red", size = 2, alpha = 0.6) + # Puntos de dispersión
labs(title = "Gráfico de Dispersión: Oyentes vs. Día",
x = "Fecha",
y = "Oyentes Mensuales (*100)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
subdata$Fecha<- as.factor(subdata$Fecha)
independiente<- subdata$Fecha
independiente <- dmy(independiente)
dependiente<- (subdata$Oyentes.Mensuales)/100
view(independiente)
sum(is.na(independiente))
## [1] 0
sum(is.na(dependiente))
## [1] 0
descriptivo(dependiente, independiente)
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
## Min. 1st Qu. Median Mean 3rd Qu. Max. Std. Dev
## 785.3 846.7 1031.0 1103.6 1307.3 1671.2 281.0
## [1] ""
## Warning in ks.test.default(dependiente, "pnorm", mean(dependiente),
## sd(dependiente)): ties should not be present for the Kolmogorov-Smirnov test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: dependiente
## D = 0.16658, p-value = 0.356
## alternative hypothesis: two-sided
##
## [1] ""


MRLS <- function(dependiente, independiente){
library(ggplot2)
library(lmtest)
datos <- data.frame(dependiente, independiente)
modelo_lineal <- lm(dependiente~independiente, datos)
print(summary(modelo_lineal))
confint(modelo_lineal)
print(ggplot(data = datos, mapping = aes(x = independiente, y = dependiente)) +
geom_point(color = "firebrick", size = 2) +
labs(title = 'Diagrama de dispersion', x = 'Fecha', y='Oyentes mensuales (*100)') +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)))
errores <- modelo_lineal$residuals
# Verificación de linealidad
plot(fitted(modelo_lineal), errores,
main = "Residuos vs Valores Ajustados",
xlab = "Valores Ajustados",
ylab = "Residuos",
pch = 20,
col = "blue")
abline(h = 0, col = "red", lwd = 2)
#Verificación de normalidad
n <- length(dependiente)
print(ks.test(errores, "pnorm", mean(errores), sd(errores)))
#Verificación de homocedasticidad
print(bptest(modelo_lineal))
#Verificación de independenciad de los errores
print(dwtest(modelo_lineal))
return(modelo_lineal)
}
modelo<- MRLS(dependiente, independiente)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Call:
## lm(formula = dependiente ~ independiente, data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -173.92 -70.54 -13.28 62.89 254.48
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.803e+05 4.117e+04 -14.10 1.65e-14 ***
## independiente 2.888e+01 2.045e+00 14.12 1.57e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 101.8 on 29 degrees of freedom
## Multiple R-squared: 0.873, Adjusted R-squared: 0.8687
## F-statistic: 199.4 on 1 and 29 DF, p-value: 1.575e-14
## `geom_smooth()` using formula = 'y ~ x'


##
## Exact one-sample Kolmogorov-Smirnov test
##
## data: errores
## D = 0.1741, p-value = 0.2714
## alternative hypothesis: two-sided
##
##
## studentized Breusch-Pagan test
##
## data: modelo_lineal
## BP = 3.15, df = 1, p-value = 0.07593
##
##
## Durbin-Watson test
##
## data: modelo_lineal
## DW = 0.14476, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
predecir<- function(dependiente, independiente, modelo){
library(ggplot2)
library(scales)
library(dplyr)
ultima_fecha <- max(independiente, na.rm = TRUE)
# Crear nuevas fechas (por ejemplo, 5 años después)
nuevas_fechas <- seq(from = ultima_fecha, by = "day", length.out = 30) # Incluye el año actual + 5 años
# Convertir a dataframe para predecir
nuevos_datos <- data.frame(independiente = nuevas_fechas)
# Hacer la predicción
predicciones <- predict(modelo, newdata = nuevos_datos, interval="confidence")
#predicciones <- log10(predicciones)
# Ver resultados
datos_pred <- data.frame(
independiente = nuevas_fechas,
dependiente = predicciones[, "fit"],
lwr = predicciones[, "lwr"], # Límite inferior
upr = predicciones[, "upr"] # Límite superior
)
datos <- data.frame(independiente, dependiente)
datos_completos <- data.frame(
independiente = c(datos$independiente, datos_pred$independiente),
dependiente_modelo = predict(modelo, newdata = data.frame(independiente = c(datos$independiente, datos_pred$independiente)))
)
ggplot() +
# Puntos originales en rojo
geom_line(data = datos, aes(x = independiente, y = dependiente),
color = "firebrick", size = 2) +
# Línea negra con la tendencia del modelo
geom_line(data = datos_completos, aes(x = independiente, y = dependiente_modelo),
color = "black", size = 1) +
# Puntos de predicción en verde (sin conectar)
geom_line(data = datos_pred, aes(x = independiente, y = dependiente),
color = "darkgreen", size = 2) +
# Intervalo de confianza en azul
geom_ribbon(data = datos_pred, aes(x = independiente, ymin = lwr, ymax = upr),
alpha = 0.2, fill = "black") +
# Etiquetas y formato
labs(title = 'Modelo de Regresión con Predicciones',
x = 'Día',
y = 'Oyentes Mensuales (Escala log_10(valor)/100)') +
# Más etiquetas en el eje X
scale_x_date(date_breaks = "1 week", date_labels = "%b %Y") +
# Más etiquetas en el eje Y
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = comma_format()) +
# Mejor formato visual
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5))
}
predecir(dependiente, independiente, modelo)
## Warning: package 'scales' was built under R version 4.3.3
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
