Regresión Lineal
Importar la base de datos de csv
Usar file.choose()
data <- read.csv("C:\\Users\\anavi\\Downloads\\rentadebicis.csv")
Entender la base de datos
str(data)
## 'data.frame': 10886 obs. of 14 variables:
## $ hora : int 0 1 2 3 4 5 6 7 8 9 ...
## $ dia : int 1 1 1 1 1 1 1 1 1 1 ...
## $ mes : int 1 1 1 1 1 1 1 1 1 1 ...
## $ año : int 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
## $ estacion : int 1 1 1 1 1 1 1 1 1 1 ...
## $ dia_de_la_semana : int 6 6 6 6 6 6 6 6 6 6 ...
## $ asueto : int 0 0 0 0 0 0 0 0 0 0 ...
## $ temperatura : num 9.84 9.02 9.02 9.84 9.84 ...
## $ sensacion_termica : num 14.4 13.6 13.6 14.4 14.4 ...
## $ humedad : int 81 80 80 75 75 75 80 86 75 76 ...
## $ velocidad_del_viento : num 0 0 0 0 0 ...
## $ rentas_de_no_registrados: int 3 8 5 3 0 0 2 1 1 8 ...
## $ rentas_de_registrados : int 13 32 27 10 1 1 0 2 7 6 ...
## $ rentas_totales : int 16 40 32 13 1 1 2 3 8 14 ...
summary(data)
## hora dia mes año
## Min. : 0.00 Min. : 1.000 Min. : 1.000 Min. :2011
## 1st Qu.: 6.00 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:2011
## Median :12.00 Median :10.000 Median : 7.000 Median :2012
## Mean :11.54 Mean : 9.993 Mean : 6.521 Mean :2012
## 3rd Qu.:18.00 3rd Qu.:15.000 3rd Qu.:10.000 3rd Qu.:2012
## Max. :23.00 Max. :19.000 Max. :12.000 Max. :2012
## estacion dia_de_la_semana asueto temperatura
## Min. :1.000 Min. :1.000 Min. :0.00000 Min. : 0.82
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:13.94
## Median :3.000 Median :4.000 Median :0.00000 Median :20.50
## Mean :2.507 Mean :4.014 Mean :0.02857 Mean :20.23
## 3rd Qu.:4.000 3rd Qu.:6.000 3rd Qu.:0.00000 3rd Qu.:26.24
## Max. :4.000 Max. :7.000 Max. :1.00000 Max. :41.00
## sensacion_termica humedad velocidad_del_viento
## Min. : 0.76 Min. : 0.00 Min. : 0.000
## 1st Qu.:16.66 1st Qu.: 47.00 1st Qu.: 7.002
## Median :24.24 Median : 62.00 Median :12.998
## Mean :23.66 Mean : 61.89 Mean :12.799
## 3rd Qu.:31.06 3rd Qu.: 77.00 3rd Qu.:16.998
## Max. :45.45 Max. :100.00 Max. :56.997
## rentas_de_no_registrados rentas_de_registrados rentas_totales
## Min. : 0.00 Min. : 0.0 Min. : 1.0
## 1st Qu.: 4.00 1st Qu.: 36.0 1st Qu.: 42.0
## Median : 17.00 Median :118.0 Median :145.0
## Mean : 36.02 Mean :155.6 Mean :191.6
## 3rd Qu.: 49.00 3rd Qu.:222.0 3rd Qu.:284.0
## Max. :367.00 Max. :886.0 Max. :977.0
Generar el Modelo
regresion <- lm(rentas_totales~factor(hora)+factor(dia)+factor(mes)+factor(dia_de_la_semana)+sensacion_termica+humedad+velocidad_del_viento, data=data)
summary(regresion)
##
## Call:
## lm(formula = rentas_totales ~ factor(hora) + factor(dia) + factor(mes) +
## factor(dia_de_la_semana) + sensacion_termica + humedad +
## velocidad_del_viento, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -383.40 -60.38 -10.31 51.73 478.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19.79925 10.22263 -1.937 0.052795 .
## factor(hora)1 -16.51526 7.32236 -2.255 0.024125 *
## factor(hora)2 -25.48030 7.34985 -3.467 0.000529 ***
## factor(hora)3 -34.92335 7.41979 -4.707 2.55e-06 ***
## factor(hora)4 -34.49158 7.38813 -4.669 3.07e-06 ***
## factor(hora)5 -18.72468 7.35044 -2.547 0.010866 *
## factor(hora)6 40.11928 7.34166 5.465 4.74e-08 ***
## factor(hora)7 172.99199 7.33212 23.594 < 2e-16 ***
## factor(hora)8 314.72029 7.32121 42.987 < 2e-16 ***
## factor(hora)9 162.82907 7.32459 22.230 < 2e-16 ***
## factor(hora)10 104.11248 7.34751 14.170 < 2e-16 ***
## factor(hora)11 127.23150 7.39009 17.217 < 2e-16 ***
## factor(hora)12 163.55081 7.43684 21.992 < 2e-16 ***
## factor(hora)13 156.63217 7.49251 20.905 < 2e-16 ***
## factor(hora)14 137.45909 7.53321 18.247 < 2e-16 ***
## factor(hora)15 147.26635 7.54263 19.525 < 2e-16 ***
## factor(hora)16 211.44995 7.53173 28.075 < 2e-16 ***
## factor(hora)17 369.65363 7.48935 49.357 < 2e-16 ***
## factor(hora)18 337.78238 7.44675 45.360 < 2e-16 ***
## factor(hora)19 231.40165 7.39079 31.309 < 2e-16 ***
## factor(hora)20 151.45923 7.35602 20.590 < 2e-16 ***
## factor(hora)21 103.36411 7.33145 14.099 < 2e-16 ***
## factor(hora)22 69.60343 7.32048 9.508 < 2e-16 ***
## factor(hora)23 30.76025 7.31390 4.206 2.62e-05 ***
## factor(dia)2 5.08816 6.53343 0.779 0.436121
## factor(dia)3 11.37471 6.54191 1.739 0.082108 .
## factor(dia)4 14.62179 6.53044 2.239 0.025175 *
## factor(dia)5 9.81316 6.52917 1.503 0.132875
## factor(dia)6 15.08638 6.53253 2.309 0.020939 *
## factor(dia)7 3.06089 6.52599 0.469 0.639057
## factor(dia)8 0.59983 6.52119 0.092 0.926714
## factor(dia)9 13.08818 6.53097 2.004 0.045093 *
## factor(dia)10 8.76995 6.54632 1.340 0.180379
## factor(dia)11 14.86020 6.55892 2.266 0.023493 *
## factor(dia)12 12.95519 6.53934 1.981 0.047604 *
## factor(dia)13 12.76125 6.54703 1.949 0.051302 .
## factor(dia)14 10.81383 6.53296 1.655 0.097898 .
## factor(dia)15 17.20653 6.52450 2.637 0.008371 **
## factor(dia)16 12.38359 6.52813 1.897 0.057860 .
## factor(dia)17 28.28428 6.52974 4.332 1.49e-05 ***
## factor(dia)18 9.06000 6.56085 1.381 0.167332
## factor(dia)19 8.87800 6.52419 1.361 0.173611
## factor(mes)2 6.04905 5.27631 1.146 0.251632
## factor(mes)3 20.03139 5.57212 3.595 0.000326 ***
## factor(mes)4 37.67279 5.92180 6.362 2.08e-10 ***
## factor(mes)5 64.29692 6.60758 9.731 < 2e-16 ***
## factor(mes)6 46.85157 7.33684 6.386 1.77e-10 ***
## factor(mes)7 16.20643 8.10290 2.000 0.045517 *
## factor(mes)8 32.70767 7.76753 4.211 2.57e-05 ***
## factor(mes)9 64.86741 7.15146 9.071 < 2e-16 ***
## factor(mes)10 84.66749 6.32237 13.392 < 2e-16 ***
## factor(mes)11 75.57560 5.50487 13.729 < 2e-16 ***
## factor(mes)12 75.02146 5.46779 13.721 < 2e-16 ***
## factor(dia_de_la_semana)2 1.38287 3.99499 0.346 0.729236
## factor(dia_de_la_semana)3 4.89429 3.98057 1.230 0.218894
## factor(dia_de_la_semana)4 5.52725 3.99059 1.385 0.166059
## factor(dia_de_la_semana)5 10.59223 4.01705 2.637 0.008381 **
## factor(dia_de_la_semana)6 12.24981 3.96400 3.090 0.002005 **
## factor(dia_de_la_semana)7 -3.46346 3.96978 -0.872 0.382979
## sensacion_termica 5.82569 0.27138 21.467 < 2e-16 ***
## humedad -1.55369 0.06828 -22.755 < 2e-16 ***
## velocidad_del_viento -0.87306 0.14093 -6.195 6.04e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 110.4 on 10824 degrees of freedom
## Multiple R-squared: 0.6309, Adjusted R-squared: 0.6288
## F-statistic: 303.3 on 61 and 10824 DF, p-value: < 2.2e-16
Conclusiones
Modelo altamente significativo con un poder explicativo del
69%.
Efecto del horario: Picos de rentas en horarios de 8 am y 5-6
pm.
Efecto mensual con fuerte estacionalidad.
LS0tDQp0aXRsZTogInJlZ3Jlc2lvbjFmZWIxNyINCmF1dGhvcjogIkFuYSBWaWN0b3JpYSBWZW5lZ2FzIEEwMTU2NzI0NyINCmRhdGU6ICIyMDI2LTAyLTE3Ig0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IFRSVUUNCiAgICB0b2NfZmxvYXQ6IFRSVUUNCiAgICBjb2RlX2Rvd25sb2FkOiBUUlVFDQogICAgdGhlbWU6IGNvc21vDQotLS0NCiMgUmVncmVzacOzbiBMaW5lYWwNCiMgSW1wb3J0YXIgbGEgYmFzZSBkZSBkYXRvcyBkZSBjc3YNCiMgVXNhciBmaWxlLmNob29zZSgpDQpgYGB7cn0NCmRhdGEgPC0gcmVhZC5jc3YoIkM6XFxVc2Vyc1xcYW5hdmlcXERvd25sb2Fkc1xccmVudGFkZWJpY2lzLmNzdiIpDQpgYGANCg0KIyBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zDQpgYGB7cn0NCnN0cihkYXRhKQ0Kc3VtbWFyeShkYXRhKQ0KYGBgDQojIEdlbmVyYXIgZWwgTW9kZWxvDQpgYGB7cn0NCnJlZ3Jlc2lvbiA8LSBsbShyZW50YXNfdG90YWxlc35mYWN0b3IoaG9yYSkrZmFjdG9yKGRpYSkrZmFjdG9yKG1lcykrZmFjdG9yKGRpYV9kZV9sYV9zZW1hbmEpK3NlbnNhY2lvbl90ZXJtaWNhK2h1bWVkYWQrdmVsb2NpZGFkX2RlbF92aWVudG8sIGRhdGE9ZGF0YSkNCnN1bW1hcnkocmVncmVzaW9uKQ0KYGBgDQojIENvbmNsdXNpb25lcw0KIyBNb2RlbG8gYWx0YW1lbnRlIHNpZ25pZmljYXRpdm8gY29uIHVuIHBvZGVyIGV4cGxpY2F0aXZvIGRlbCA2OSUuDQojIEVmZWN0byBkZWwgaG9yYXJpbzogUGljb3MgZGUgcmVudGFzIGVuIGhvcmFyaW9zIGRlIDggYW0geSA1LTYgcG0uDQojIEVmZWN0byBtZW5zdWFsIGNvbiBmdWVydGUgZXN0YWNpb25hbGlkYWQuDQojIENsaW1hIGFmZWN0YSBkZSBmb3JtYSBwb3NpdGl2YSB5IGxhIGh1bWVkYWQgeSB2ZWxvY2lkYWQgZGVsIHZpZW50byBkZSBmb3JtYSBuZWdhdGl2YS4NCmBgYHtyfQ0KZGF0b3NfbnVldm9zIDwtIGRhdGEuZnJhbWUoaG9yYT0xMiwgZGlhPTEsIG1lcz0xOjEyLCBkaWFfZGVfbGFfc2VtYW5hPTEsIHNlbnNhY2lvbl90ZXJtaWNhPTI0LCBodW1lZGFkPTYyLCB2ZWxvY2lkYWRfZGVsX3ZpZW50bz0xMykNCnByZWRpY3QocmVncmVzaW9uLCBkYXRvc19udWV2b3MpDQpgYGANCg0K