data <- read.csv(choose.files())
str(data)
## 'data.frame': 10886 obs. of 14 variables:
## $ hora : int 0 1 2 3 4 5 6 7 8 9 ...
## $ dia : int 1 1 1 1 1 1 1 1 1 1 ...
## $ mes : int 1 1 1 1 1 1 1 1 1 1 ...
## $ año : int 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
## $ estacion : int 1 1 1 1 1 1 1 1 1 1 ...
## $ dia_de_la_semana : int 6 6 6 6 6 6 6 6 6 6 ...
## $ asueto : int 0 0 0 0 0 0 0 0 0 0 ...
## $ temperatura : num 9.84 9.02 9.02 9.84 9.84 ...
## $ sensacion_termica : num 14.4 13.6 13.6 14.4 14.4 ...
## $ humedad : int 81 80 80 75 75 75 80 86 75 76 ...
## $ velocidad_del_viento : num 0 0 0 0 0 ...
## $ rentas_de_no_registrados: int 3 8 5 3 0 0 2 1 1 8 ...
## $ rentas_de_registrados : int 13 32 27 10 1 1 0 2 7 6 ...
## $ rentas_totales : int 16 40 32 13 1 1 2 3 8 14 ...
summary(data)
## hora dia mes año
## Min. : 0.00 Min. : 1.000 Min. : 1.000 Min. :2011
## 1st Qu.: 6.00 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:2011
## Median :12.00 Median :10.000 Median : 7.000 Median :2012
## Mean :11.54 Mean : 9.993 Mean : 6.521 Mean :2012
## 3rd Qu.:18.00 3rd Qu.:15.000 3rd Qu.:10.000 3rd Qu.:2012
## Max. :23.00 Max. :19.000 Max. :12.000 Max. :2012
## estacion dia_de_la_semana asueto temperatura
## Min. :1.000 Min. :1.000 Min. :0.00000 Min. : 0.82
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:13.94
## Median :3.000 Median :4.000 Median :0.00000 Median :20.50
## Mean :2.507 Mean :4.014 Mean :0.02857 Mean :20.23
## 3rd Qu.:4.000 3rd Qu.:6.000 3rd Qu.:0.00000 3rd Qu.:26.24
## Max. :4.000 Max. :7.000 Max. :1.00000 Max. :41.00
## sensacion_termica humedad velocidad_del_viento
## Min. : 0.76 Min. : 0.00 Min. : 0.000
## 1st Qu.:16.66 1st Qu.: 47.00 1st Qu.: 7.002
## Median :24.24 Median : 62.00 Median :12.998
## Mean :23.66 Mean : 61.89 Mean :12.799
## 3rd Qu.:31.06 3rd Qu.: 77.00 3rd Qu.:16.998
## Max. :45.45 Max. :100.00 Max. :56.997
## rentas_de_no_registrados rentas_de_registrados rentas_totales
## Min. : 0.00 Min. : 0.0 Min. : 1.0
## 1st Qu.: 4.00 1st Qu.: 36.0 1st Qu.: 42.0
## Median : 17.00 Median :118.0 Median :145.0
## Mean : 36.02 Mean :155.6 Mean :191.6
## 3rd Qu.: 49.00 3rd Qu.:222.0 3rd Qu.:284.0
## Max. :367.00 Max. :886.0 Max. :977.0
regresion <- lm(rentas_totales~factor(hora)+factor(dia)+factor(mes)+año+factor(dia_de_la_semana)+sensacion_termica+humedad+velocidad_del_viento, data=data)
summary(regresion)
##
## Call:
## lm(formula = rentas_totales ~ factor(hora) + factor(dia) + factor(mes) +
## año + factor(dia_de_la_semana) + sensacion_termica + humedad +
## velocidad_del_viento, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -370.40 -61.37 -6.55 51.96 440.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.762e+05 3.972e+03 -44.364 < 2e-16 ***
## factor(hora)1 -1.764e+01 6.736e+00 -2.618 0.008845 **
## factor(hora)2 -2.765e+01 6.761e+00 -4.089 4.36e-05 ***
## factor(hora)3 -3.855e+01 6.826e+00 -5.648 1.66e-08 ***
## factor(hora)4 -3.908e+01 6.797e+00 -5.749 9.19e-09 ***
## factor(hora)5 -2.282e+01 6.762e+00 -3.375 0.000741 ***
## factor(hora)6 3.597e+01 6.754e+00 5.325 1.03e-07 ***
## factor(hora)7 1.697e+02 6.745e+00 25.156 < 2e-16 ***
## factor(hora)8 3.132e+02 6.735e+00 46.498 < 2e-16 ***
## factor(hora)9 1.636e+02 6.738e+00 24.284 < 2e-16 ***
## factor(hora)10 1.075e+02 6.759e+00 15.899 < 2e-16 ***
## factor(hora)11 1.333e+02 6.800e+00 19.598 < 2e-16 ***
## factor(hora)12 1.718e+02 6.844e+00 25.099 < 2e-16 ***
## factor(hora)13 1.666e+02 6.896e+00 24.165 < 2e-16 ***
## factor(hora)14 1.485e+02 6.934e+00 21.420 < 2e-16 ***
## factor(hora)15 1.586e+02 6.943e+00 22.839 < 2e-16 ***
## factor(hora)16 2.223e+02 6.933e+00 32.064 < 2e-16 ***
## factor(hora)17 3.792e+02 6.893e+00 55.017 < 2e-16 ***
## factor(hora)18 3.461e+02 6.853e+00 50.497 < 2e-16 ***
## factor(hora)19 2.377e+02 6.800e+00 34.956 < 2e-16 ***
## factor(hora)20 1.563e+02 6.768e+00 23.096 < 2e-16 ***
## factor(hora)21 1.067e+02 6.745e+00 15.822 < 2e-16 ***
## factor(hora)22 7.167e+01 6.734e+00 10.642 < 2e-16 ***
## factor(hora)23 3.169e+01 6.728e+00 4.710 2.50e-06 ***
## factor(dia)2 4.661e+00 6.010e+00 0.776 0.438043
## factor(dia)3 1.098e+01 6.018e+00 1.824 0.068110 .
## factor(dia)4 1.455e+01 6.007e+00 2.421 0.015479 *
## factor(dia)5 9.545e+00 6.006e+00 1.589 0.112063
## factor(dia)6 1.350e+01 6.009e+00 2.247 0.024649 *
## factor(dia)7 2.588e+00 6.003e+00 0.431 0.666368
## factor(dia)8 5.630e-02 5.999e+00 0.009 0.992512
## factor(dia)9 1.175e+01 6.008e+00 1.956 0.050487 .
## factor(dia)10 9.053e+00 6.022e+00 1.503 0.132788
## factor(dia)11 1.372e+01 6.034e+00 2.273 0.023018 *
## factor(dia)12 1.172e+01 6.016e+00 1.947 0.051503 .
## factor(dia)13 1.252e+01 6.023e+00 2.079 0.037677 *
## factor(dia)14 1.115e+01 6.010e+00 1.855 0.063574 .
## factor(dia)15 1.767e+01 6.002e+00 2.944 0.003244 **
## factor(dia)16 1.196e+01 6.005e+00 1.991 0.046509 *
## factor(dia)17 2.738e+01 6.007e+00 4.559 5.20e-06 ***
## factor(dia)18 8.182e+00 6.035e+00 1.356 0.175251
## factor(dia)19 9.357e+00 6.002e+00 1.559 0.119009
## factor(mes)2 1.029e+01 4.855e+00 2.119 0.034076 *
## factor(mes)3 3.055e+01 5.131e+00 5.954 2.70e-09 ***
## factor(mes)4 5.394e+01 5.460e+00 9.879 < 2e-16 ***
## factor(mes)5 8.399e+01 6.095e+00 13.781 < 2e-16 ***
## factor(mes)6 7.555e+01 6.780e+00 11.143 < 2e-16 ***
## factor(mes)7 5.105e+01 7.495e+00 6.811 1.02e-11 ***
## factor(mes)8 6.345e+01 7.179e+00 8.838 < 2e-16 ***
## factor(mes)9 8.809e+01 6.600e+00 13.349 < 2e-16 ***
## factor(mes)10 1.013e+02 5.828e+00 17.385 < 2e-16 ***
## factor(mes)11 8.487e+01 5.068e+00 16.745 < 2e-16 ***
## factor(mes)12 8.084e+01 5.032e+00 16.066 < 2e-16 ***
## año 8.760e+01 1.975e+00 44.359 < 2e-16 ***
## factor(dia_de_la_semana)2 1.931e+00 3.675e+00 0.526 0.599243
## factor(dia_de_la_semana)3 4.029e+00 3.662e+00 1.100 0.271250
## factor(dia_de_la_semana)4 6.044e+00 3.671e+00 1.646 0.099719 .
## factor(dia_de_la_semana)5 1.158e+01 3.695e+00 3.135 0.001725 **
## factor(dia_de_la_semana)6 1.360e+01 3.647e+00 3.730 0.000193 ***
## factor(dia_de_la_semana)7 -5.067e+00 3.652e+00 -1.387 0.165368
## sensacion_termica 4.347e+00 2.519e-01 17.261 < 2e-16 ***
## humedad -1.276e+00 6.312e-02 -20.217 < 2e-16 ***
## velocidad_del_viento -7.129e-01 1.297e-01 -5.497 3.95e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 101.5 on 10823 degrees of freedom
## Multiple R-squared: 0.6877, Adjusted R-squared: 0.6859
## F-statistic: 384.4 on 62 and 10823 DF, p-value: < 2.2e-16
datos_nuevos <- data.frame(hora=12, dia=1, mes=1:12, año=2013, dia_de_la_semana=1, sensacion_termica=24, humedad=62, velocidad_del_viento=13)
predict(regresion, datos_nuevos)
## 1 2 3 4 5 6 7 8
## 295.4226 305.7118 325.9737 349.3610 379.4141 370.9742 346.4712 358.8736
## 9 10 11 12
## 383.5170 396.7470 380.2941 376.2586
Modelo Altamente significativo con un poder explicativo del 69%
Hay picos de rentas en horarios de 8 am y 5-6pm
Efecto mensual con fuerte estacionalidad en los meses 10, 11 y 12