Regresión Lineal

library(readr)
data <- read_csv("rentadebicis.csv") #USAR file.choose
## Rows: 10886 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (14): hora, dia, mes, año, estacion, dia_de_la_semana, asueto, temperatu...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Entender la data

str(data)
## spc_tbl_ [10,886 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ hora                    : num [1:10886] 0 1 2 3 4 5 6 7 8 9 ...
##  $ dia                     : num [1:10886] 1 1 1 1 1 1 1 1 1 1 ...
##  $ mes                     : num [1:10886] 1 1 1 1 1 1 1 1 1 1 ...
##  $ año                     : num [1:10886] 2011 2011 2011 2011 2011 ...
##  $ estacion                : num [1:10886] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dia_de_la_semana        : num [1:10886] 6 6 6 6 6 6 6 6 6 6 ...
##  $ asueto                  : num [1:10886] 0 0 0 0 0 0 0 0 0 0 ...
##  $ temperatura             : num [1:10886] 9.84 9.02 9.02 9.84 9.84 ...
##  $ sensacion_termica       : num [1:10886] 14.4 13.6 13.6 14.4 14.4 ...
##  $ humedad                 : num [1:10886] 81 80 80 75 75 75 80 86 75 76 ...
##  $ velocidad_del_viento    : num [1:10886] 0 0 0 0 0 ...
##  $ rentas_de_no_registrados: num [1:10886] 3 8 5 3 0 0 2 1 1 8 ...
##  $ rentas_de_registrados   : num [1:10886] 13 32 27 10 1 1 0 2 7 6 ...
##  $ rentas_totales          : num [1:10886] 16 40 32 13 1 1 2 3 8 14 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   hora = col_double(),
##   ..   dia = col_double(),
##   ..   mes = col_double(),
##   ..   año = col_double(),
##   ..   estacion = col_double(),
##   ..   dia_de_la_semana = col_double(),
##   ..   asueto = col_double(),
##   ..   temperatura = col_double(),
##   ..   sensacion_termica = col_double(),
##   ..   humedad = col_double(),
##   ..   velocidad_del_viento = col_double(),
##   ..   rentas_de_no_registrados = col_double(),
##   ..   rentas_de_registrados = col_double(),
##   ..   rentas_totales = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(data)
##       hora            dia              mes              año      
##  Min.   : 0.00   Min.   : 1.000   Min.   : 1.000   Min.   :2011  
##  1st Qu.: 6.00   1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:2011  
##  Median :12.00   Median :10.000   Median : 7.000   Median :2012  
##  Mean   :11.54   Mean   : 9.993   Mean   : 6.521   Mean   :2012  
##  3rd Qu.:18.00   3rd Qu.:15.000   3rd Qu.:10.000   3rd Qu.:2012  
##  Max.   :23.00   Max.   :19.000   Max.   :12.000   Max.   :2012  
##     estacion     dia_de_la_semana     asueto         temperatura   
##  Min.   :1.000   Min.   :1.000    Min.   :0.00000   Min.   : 0.82  
##  1st Qu.:2.000   1st Qu.:2.000    1st Qu.:0.00000   1st Qu.:13.94  
##  Median :3.000   Median :4.000    Median :0.00000   Median :20.50  
##  Mean   :2.507   Mean   :4.014    Mean   :0.02857   Mean   :20.23  
##  3rd Qu.:4.000   3rd Qu.:6.000    3rd Qu.:0.00000   3rd Qu.:26.24  
##  Max.   :4.000   Max.   :7.000    Max.   :1.00000   Max.   :41.00  
##  sensacion_termica    humedad       velocidad_del_viento
##  Min.   : 0.76     Min.   :  0.00   Min.   : 0.000      
##  1st Qu.:16.66     1st Qu.: 47.00   1st Qu.: 7.002      
##  Median :24.24     Median : 62.00   Median :12.998      
##  Mean   :23.66     Mean   : 61.89   Mean   :12.799      
##  3rd Qu.:31.06     3rd Qu.: 77.00   3rd Qu.:16.998      
##  Max.   :45.45     Max.   :100.00   Max.   :56.997      
##  rentas_de_no_registrados rentas_de_registrados rentas_totales 
##  Min.   :  0.00           Min.   :  0.0         Min.   :  1.0  
##  1st Qu.:  4.00           1st Qu.: 36.0         1st Qu.: 42.0  
##  Median : 17.00           Median :118.0         Median :145.0  
##  Mean   : 36.02           Mean   :155.6         Mean   :191.6  
##  3rd Qu.: 49.00           3rd Qu.:222.0         3rd Qu.:284.0  
##  Max.   :367.00           Max.   :886.0         Max.   :977.0
head(data)
## # A tibble: 6 × 14
##    hora   dia   mes   año estacion dia_de_la_semana asueto temperatura
##   <dbl> <dbl> <dbl> <dbl>    <dbl>            <dbl>  <dbl>       <dbl>
## 1     0     1     1  2011        1                6      0        9.84
## 2     1     1     1  2011        1                6      0        9.02
## 3     2     1     1  2011        1                6      0        9.02
## 4     3     1     1  2011        1                6      0        9.84
## 5     4     1     1  2011        1                6      0        9.84
## 6     5     1     1  2011        1                6      0        9.84
## # ℹ 6 more variables: sensacion_termica <dbl>, humedad <dbl>,
## #   velocidad_del_viento <dbl>, rentas_de_no_registrados <dbl>,
## #   rentas_de_registrados <dbl>, rentas_totales <dbl>

Generar el modelo

regresion <- lm(rentas_totales~factor(hora)+factor(dia)+factor(mes)+año+factor(dia_de_la_semana)+sensacion_termica+humedad+velocidad_del_viento, data=data)
summary(regresion)
## 
## Call:
## lm(formula = rentas_totales ~ factor(hora) + factor(dia) + factor(mes) + 
##     año + factor(dia_de_la_semana) + sensacion_termica + humedad + 
##     velocidad_del_viento, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -370.40  -61.37   -6.55   51.96  440.24 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -1.762e+05  3.972e+03 -44.364  < 2e-16 ***
## factor(hora)1             -1.764e+01  6.736e+00  -2.618 0.008845 ** 
## factor(hora)2             -2.765e+01  6.761e+00  -4.089 4.36e-05 ***
## factor(hora)3             -3.855e+01  6.826e+00  -5.648 1.66e-08 ***
## factor(hora)4             -3.908e+01  6.797e+00  -5.749 9.19e-09 ***
## factor(hora)5             -2.282e+01  6.762e+00  -3.375 0.000741 ***
## factor(hora)6              3.597e+01  6.754e+00   5.325 1.03e-07 ***
## factor(hora)7              1.697e+02  6.745e+00  25.156  < 2e-16 ***
## factor(hora)8              3.132e+02  6.735e+00  46.498  < 2e-16 ***
## factor(hora)9              1.636e+02  6.738e+00  24.284  < 2e-16 ***
## factor(hora)10             1.075e+02  6.759e+00  15.899  < 2e-16 ***
## factor(hora)11             1.333e+02  6.800e+00  19.598  < 2e-16 ***
## factor(hora)12             1.718e+02  6.844e+00  25.099  < 2e-16 ***
## factor(hora)13             1.666e+02  6.896e+00  24.165  < 2e-16 ***
## factor(hora)14             1.485e+02  6.934e+00  21.420  < 2e-16 ***
## factor(hora)15             1.586e+02  6.943e+00  22.839  < 2e-16 ***
## factor(hora)16             2.223e+02  6.933e+00  32.064  < 2e-16 ***
## factor(hora)17             3.792e+02  6.893e+00  55.017  < 2e-16 ***
## factor(hora)18             3.461e+02  6.853e+00  50.497  < 2e-16 ***
## factor(hora)19             2.377e+02  6.800e+00  34.956  < 2e-16 ***
## factor(hora)20             1.563e+02  6.768e+00  23.096  < 2e-16 ***
## factor(hora)21             1.067e+02  6.745e+00  15.822  < 2e-16 ***
## factor(hora)22             7.167e+01  6.734e+00  10.642  < 2e-16 ***
## factor(hora)23             3.169e+01  6.728e+00   4.710 2.50e-06 ***
## factor(dia)2               4.661e+00  6.010e+00   0.776 0.438043    
## factor(dia)3               1.098e+01  6.018e+00   1.824 0.068110 .  
## factor(dia)4               1.455e+01  6.007e+00   2.421 0.015479 *  
## factor(dia)5               9.545e+00  6.006e+00   1.589 0.112063    
## factor(dia)6               1.350e+01  6.009e+00   2.247 0.024649 *  
## factor(dia)7               2.588e+00  6.003e+00   0.431 0.666368    
## factor(dia)8               5.630e-02  5.999e+00   0.009 0.992512    
## factor(dia)9               1.175e+01  6.008e+00   1.956 0.050487 .  
## factor(dia)10              9.053e+00  6.022e+00   1.503 0.132788    
## factor(dia)11              1.372e+01  6.034e+00   2.273 0.023018 *  
## factor(dia)12              1.172e+01  6.016e+00   1.947 0.051503 .  
## factor(dia)13              1.252e+01  6.023e+00   2.079 0.037677 *  
## factor(dia)14              1.115e+01  6.010e+00   1.855 0.063574 .  
## factor(dia)15              1.767e+01  6.002e+00   2.944 0.003244 ** 
## factor(dia)16              1.196e+01  6.005e+00   1.991 0.046509 *  
## factor(dia)17              2.738e+01  6.007e+00   4.559 5.20e-06 ***
## factor(dia)18              8.182e+00  6.035e+00   1.356 0.175251    
## factor(dia)19              9.357e+00  6.002e+00   1.559 0.119009    
## factor(mes)2               1.029e+01  4.855e+00   2.119 0.034076 *  
## factor(mes)3               3.055e+01  5.131e+00   5.954 2.70e-09 ***
## factor(mes)4               5.394e+01  5.460e+00   9.879  < 2e-16 ***
## factor(mes)5               8.399e+01  6.095e+00  13.781  < 2e-16 ***
## factor(mes)6               7.555e+01  6.780e+00  11.143  < 2e-16 ***
## factor(mes)7               5.105e+01  7.495e+00   6.811 1.02e-11 ***
## factor(mes)8               6.345e+01  7.179e+00   8.838  < 2e-16 ***
## factor(mes)9               8.809e+01  6.600e+00  13.349  < 2e-16 ***
## factor(mes)10              1.013e+02  5.828e+00  17.385  < 2e-16 ***
## factor(mes)11              8.487e+01  5.068e+00  16.745  < 2e-16 ***
## factor(mes)12              8.084e+01  5.032e+00  16.066  < 2e-16 ***
## año                        8.760e+01  1.975e+00  44.359  < 2e-16 ***
## factor(dia_de_la_semana)2  1.931e+00  3.675e+00   0.526 0.599243    
## factor(dia_de_la_semana)3  4.029e+00  3.662e+00   1.100 0.271250    
## factor(dia_de_la_semana)4  6.044e+00  3.671e+00   1.646 0.099719 .  
## factor(dia_de_la_semana)5  1.158e+01  3.695e+00   3.135 0.001725 ** 
## factor(dia_de_la_semana)6  1.360e+01  3.647e+00   3.730 0.000193 ***
## factor(dia_de_la_semana)7 -5.067e+00  3.652e+00  -1.387 0.165368    
## sensacion_termica          4.347e+00  2.519e-01  17.261  < 2e-16 ***
## humedad                   -1.276e+00  6.312e-02 -20.217  < 2e-16 ***
## velocidad_del_viento      -7.129e-01  1.297e-01  -5.497 3.95e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 101.5 on 10823 degrees of freedom
## Multiple R-squared:  0.6877, Adjusted R-squared:  0.6859 
## F-statistic: 384.4 on 62 and 10823 DF,  p-value: < 2.2e-16
regresion_alt <- lm(rentas_totales~hora+mes+año+sensacion_termica+humedad+velocidad_del_viento, data=data)
summary(regresion_alt)
## 
## Call:
## lm(formula = rentas_totales ~ hora + mes + año + sensacion_termica + 
##     humedad + velocidad_del_viento, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -308.60  -93.85  -28.34   61.05  648.09 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.662e+05  5.496e+03 -30.250  < 2e-16 ***
## hora                  7.734e+00  2.070e-01  37.364  < 2e-16 ***
## mes                   7.574e+00  4.207e-01  18.002  < 2e-16 ***
## año                   8.266e+01  2.732e+00  30.258  < 2e-16 ***
## sensacion_termica     6.172e+00  1.689e-01  36.539  < 2e-16 ***
## humedad              -2.121e+00  7.858e-02 -26.988  < 2e-16 ***
## velocidad_del_viento  6.208e-01  1.771e-01   3.506 0.000457 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 141.7 on 10879 degrees of freedom
## Multiple R-squared:  0.3886, Adjusted R-squared:  0.3883 
## F-statistic:  1153 on 6 and 10879 DF,  p-value: < 2.2e-16

Conclusiones

Poder explicativo del modelo = 69%
Modelo altamente significativo estadísticamente
Efecto del horario: Picos de rentas en horarios de 8 am y de 5-6 pm.
Efecto mensual con fuerte estacionalidad
Clima afecta de forma positiva y la humedad y velocidad del viento de forma negativa.

LS0tDQp0aXRsZTogIlJlZ3Jlc2lvbl9saW5lYWwiDQphdXRob3I6ICJEaWVnbyBRdWV2ZWRvIFNhcmFiaWEiDQpkYXRlOiAiMjAyNi0wMi0xNyINCm91dHB1dDogDQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiBUUlVFDQogICAgdG9jX2Zsb2F0OiBUUlVFDQogICAgY29kZV9kb3dubG9hZDogVFJVRQ0KICAgIHRoZW1lOiBjb3Ntbw0KLS0tDQoNCiMgUmVncmVzacOzbiBMaW5lYWwNCg0KYGBge3J9DQpsaWJyYXJ5KHJlYWRyKQ0KZGF0YSA8LSByZWFkX2NzdigicmVudGFkZWJpY2lzLmNzdiIpICNVU0FSIGZpbGUuY2hvb3NlDQpgYGANCg0KDQojIEVudGVuZGVyIGxhIGRhdGENCmBgYHtyfQ0Kc3RyKGRhdGEpDQpzdW1tYXJ5KGRhdGEpDQpoZWFkKGRhdGEpDQpgYGANCg0KIyBHZW5lcmFyIGVsIG1vZGVsbw0KDQpgYGB7cn0NCnJlZ3Jlc2lvbiA8LSBsbShyZW50YXNfdG90YWxlc35mYWN0b3IoaG9yYSkrZmFjdG9yKGRpYSkrZmFjdG9yKG1lcykrYcOxbytmYWN0b3IoZGlhX2RlX2xhX3NlbWFuYSkrc2Vuc2FjaW9uX3Rlcm1pY2EraHVtZWRhZCt2ZWxvY2lkYWRfZGVsX3ZpZW50bywgZGF0YT1kYXRhKQ0Kc3VtbWFyeShyZWdyZXNpb24pDQpgYGANCg0KYGBge3J9DQpyZWdyZXNpb25fYWx0IDwtIGxtKHJlbnRhc190b3RhbGVzfmhvcmErbWVzK2HDsW8rc2Vuc2FjaW9uX3Rlcm1pY2EraHVtZWRhZCt2ZWxvY2lkYWRfZGVsX3ZpZW50bywgZGF0YT1kYXRhKQ0Kc3VtbWFyeShyZWdyZXNpb25fYWx0KQ0KYGBgDQoNCg0KIyBDb25jbHVzaW9uZXMNClBvZGVyIGV4cGxpY2F0aXZvIGRlbCBtb2RlbG8gPSA2OSUgIA0KTW9kZWxvIGFsdGFtZW50ZSBzaWduaWZpY2F0aXZvIGVzdGFkw61zdGljYW1lbnRlICANCkVmZWN0byBkZWwgaG9yYXJpbzogUGljb3MgZGUgcmVudGFzIGVuIGhvcmFyaW9zIGRlIDggYW0geSBkZSA1LTYgcG0uICANCkVmZWN0byBtZW5zdWFsIGNvbiBmdWVydGUgZXN0YWNpb25hbGlkYWQgIA0KQ2xpbWEgYWZlY3RhIGRlIGZvcm1hIHBvc2l0aXZhIHkgbGEgaHVtZWRhZCB5IHZlbG9jaWRhZCBkZWwgdmllbnRvIGRlIGZvcm1hIG5lZ2F0aXZhLg0K