data <- read.csv("/Users/marceloreyesp/Desktop/Negocios/6to Semestre LIT/Clase Codigos/Clase Codigo R/rentadebicis.csv")
str(data)
## 'data.frame': 10886 obs. of 14 variables:
## $ hora : int 0 1 2 3 4 5 6 7 8 9 ...
## $ dia : int 1 1 1 1 1 1 1 1 1 1 ...
## $ mes : int 1 1 1 1 1 1 1 1 1 1 ...
## $ año : int 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
## $ estacion : int 1 1 1 1 1 1 1 1 1 1 ...
## $ dia_de_la_semana : int 6 6 6 6 6 6 6 6 6 6 ...
## $ asueto : int 0 0 0 0 0 0 0 0 0 0 ...
## $ temperatura : num 9.84 9.02 9.02 9.84 9.84 ...
## $ sensacion_termica : num 14.4 13.6 13.6 14.4 14.4 ...
## $ humedad : int 81 80 80 75 75 75 80 86 75 76 ...
## $ velocidad_del_viento : num 0 0 0 0 0 ...
## $ rentas_de_no_registrados: int 3 8 5 3 0 0 2 1 1 8 ...
## $ rentas_de_registrados : int 13 32 27 10 1 1 0 2 7 6 ...
## $ rentas_totales : int 16 40 32 13 1 1 2 3 8 14 ...
summary(data)
## hora dia mes año
## Min. : 0.00 Min. : 1.000 Min. : 1.000 Min. :2011
## 1st Qu.: 6.00 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:2011
## Median :12.00 Median :10.000 Median : 7.000 Median :2012
## Mean :11.54 Mean : 9.993 Mean : 6.521 Mean :2012
## 3rd Qu.:18.00 3rd Qu.:15.000 3rd Qu.:10.000 3rd Qu.:2012
## Max. :23.00 Max. :19.000 Max. :12.000 Max. :2012
## estacion dia_de_la_semana asueto temperatura
## Min. :1.000 Min. :1.000 Min. :0.00000 Min. : 0.82
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:13.94
## Median :3.000 Median :4.000 Median :0.00000 Median :20.50
## Mean :2.507 Mean :4.014 Mean :0.02857 Mean :20.23
## 3rd Qu.:4.000 3rd Qu.:6.000 3rd Qu.:0.00000 3rd Qu.:26.24
## Max. :4.000 Max. :7.000 Max. :1.00000 Max. :41.00
## sensacion_termica humedad velocidad_del_viento
## Min. : 0.76 Min. : 0.00 Min. : 0.000
## 1st Qu.:16.66 1st Qu.: 47.00 1st Qu.: 7.002
## Median :24.24 Median : 62.00 Median :12.998
## Mean :23.66 Mean : 61.89 Mean :12.799
## 3rd Qu.:31.06 3rd Qu.: 77.00 3rd Qu.:16.998
## Max. :45.45 Max. :100.00 Max. :56.997
## rentas_de_no_registrados rentas_de_registrados rentas_totales
## Min. : 0.00 Min. : 0.0 Min. : 1.0
## 1st Qu.: 4.00 1st Qu.: 36.0 1st Qu.: 42.0
## Median : 17.00 Median :118.0 Median :145.0
## Mean : 36.02 Mean :155.6 Mean :191.6
## 3rd Qu.: 49.00 3rd Qu.:222.0 3rd Qu.:284.0
## Max. :367.00 Max. :886.0 Max. :977.0
data$estacion <- as.factor(data$estacion)
data$hora <- as.factor(data$hora)
data$dia_de_la_semana <- as.factor(data$dia_de_la_semana)
data$asueto <- as.factor(data$asueto)
names(Filter(is.factor, data))
## [1] "hora" "estacion" "dia_de_la_semana" "asueto"
modelo_lineal <- lm(rentas_totales ~ temperatura, data = data)
summary(modelo_lineal)
##
## Call:
## lm(formula = rentas_totales ~ temperatura, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -293.32 -112.36 -33.36 78.98 741.44
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.0462 4.4394 1.362 0.173
## temperatura 9.1705 0.2048 44.783 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 166.5 on 10884 degrees of freedom
## Multiple R-squared: 0.1556, Adjusted R-squared: 0.1555
## F-statistic: 2006 on 1 and 10884 DF, p-value: < 2.2e-16
modelo_multiple <- lm(rentas_totales ~ humedad + velocidad_del_viento + hora + estacion + dia_de_la_semana, data = data)
summary(modelo_multiple)
##
## Call:
## lm(formula = rentas_totales ~ humedad + velocidad_del_viento +
## hora + estacion + dia_de_la_semana, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -403.63 -61.83 -11.09 52.62 519.17
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 80.24406 8.08889 9.920 < 2e-16 ***
## humedad -1.45147 0.06833 -21.243 < 2e-16 ***
## velocidad_del_viento -1.10935 0.14483 -7.660 2.02e-14 ***
## hora1 -19.31309 7.61797 -2.535 0.011252 *
## hora2 -30.24205 7.64475 -3.956 7.67e-05 ***
## hora3 -40.83087 7.71431 -5.293 1.23e-07 ***
## hora4 -42.79988 7.67805 -5.574 2.54e-08 ***
## hora5 -28.96085 7.63397 -3.794 0.000149 ***
## hora6 29.32168 7.62325 3.846 0.000121 ***
## hora7 164.29050 7.61877 21.564 < 2e-16 ***
## hora8 310.97559 7.61580 40.833 < 2e-16 ***
## hora9 165.31132 7.62043 21.693 < 2e-16 ***
## hora10 112.73593 7.63526 14.765 < 2e-16 ***
## hora11 142.30691 7.65942 18.579 < 2e-16 ***
## hora12 183.79786 7.68379 23.920 < 2e-16 ***
## hora13 181.67158 7.71341 23.553 < 2e-16 ***
## hora14 165.51687 7.73500 21.398 < 2e-16 ***
## hora15 176.07138 7.73928 22.750 < 2e-16 ***
## hora16 239.03832 7.73691 30.896 < 2e-16 ***
## hora17 393.88037 7.71538 51.051 < 2e-16 ***
## hora18 358.59085 7.69107 46.624 < 2e-16 ***
## hora19 247.33762 7.65629 32.305 < 2e-16 ***
## hora20 163.53783 7.63443 21.421 < 2e-16 ***
## hora21 111.66580 7.61913 14.656 < 2e-16 ***
## hora22 74.79344 7.61337 9.824 < 2e-16 ***
## hora23 32.89484 7.60957 4.323 1.55e-05 ***
## estacion2 106.84233 3.13637 34.066 < 2e-16 ***
## estacion3 128.68249 3.18039 40.461 < 2e-16 ***
## estacion4 96.32727 3.19957 30.106 < 2e-16 ***
## dia_de_la_semana2 1.99237 4.13355 0.482 0.629816
## dia_de_la_semana3 3.69224 4.12665 0.895 0.370952
## dia_de_la_semana4 4.90319 4.13118 1.187 0.235304
## dia_de_la_semana5 5.37441 4.14355 1.297 0.194640
## dia_de_la_semana6 8.65941 4.10387 2.110 0.034876 *
## dia_de_la_semana7 -7.47591 4.10634 -1.821 0.068699 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 114.8 on 10851 degrees of freedom
## Multiple R-squared: 0.5994, Adjusted R-squared: 0.5982
## F-statistic: 477.6 on 34 and 10851 DF, p-value: < 2.2e-16
datos_nuevos <- data.frame(
hora=12,
estacion=1,
dia=1,
mes=1:12,
ano=2013,
dia_de_la_semana=1,
sensacion_termica=24,
humedad=62,
velocidad_del_viento=13
)
datos_nuevos$hora <- factor(datos_nuevos$hora, levels = levels(data$hora))
datos_nuevos$estacion <- factor(datos_nuevos$estacion, levels = levels(data$estacion))
datos_nuevos$dia_de_la_semana <- factor(datos_nuevos$dia_de_la_semana, levels = levels(data$dia_de_la_semana))
predict(modelo_multiple, datos_nuevos)
## 1 2 3 4 5 6 7 8
## 159.6291 159.6291 159.6291 159.6291 159.6291 159.6291 159.6291 159.6291
## 9 10 11 12
## 159.6291 159.6291 159.6291 159.6291
Modelo altamente significativo con un poder explicativo del 69% Hay picos de rentas en horarios de 8 am y 5-6pm Efecto mensual con fuerte estacionalidad en los meses 10,11 y 12