Regresion lineal

#Usar FILE.CHOOSE() PARA ENCONTRAR LA DIRECCIÓN DEL DOC (de preferencia en la consola)
data <- read.csv("/Users/eduardojuniormedinahernandez/Downloads/rentadebicis.csv")

str(data)
## 'data.frame':    10886 obs. of  14 variables:
##  $ hora                    : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ dia                     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ mes                     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ año                     : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
##  $ estacion                : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ dia_de_la_semana        : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ asueto                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ temperatura             : num  9.84 9.02 9.02 9.84 9.84 ...
##  $ sensacion_termica       : num  14.4 13.6 13.6 14.4 14.4 ...
##  $ humedad                 : int  81 80 80 75 75 75 80 86 75 76 ...
##  $ velocidad_del_viento    : num  0 0 0 0 0 ...
##  $ rentas_de_no_registrados: int  3 8 5 3 0 0 2 1 1 8 ...
##  $ rentas_de_registrados   : int  13 32 27 10 1 1 0 2 7 6 ...
##  $ rentas_totales          : int  16 40 32 13 1 1 2 3 8 14 ...
summary(data)
##       hora            dia              mes              año      
##  Min.   : 0.00   Min.   : 1.000   Min.   : 1.000   Min.   :2011  
##  1st Qu.: 6.00   1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:2011  
##  Median :12.00   Median :10.000   Median : 7.000   Median :2012  
##  Mean   :11.54   Mean   : 9.993   Mean   : 6.521   Mean   :2012  
##  3rd Qu.:18.00   3rd Qu.:15.000   3rd Qu.:10.000   3rd Qu.:2012  
##  Max.   :23.00   Max.   :19.000   Max.   :12.000   Max.   :2012  
##     estacion     dia_de_la_semana     asueto         temperatura   
##  Min.   :1.000   Min.   :1.000    Min.   :0.00000   Min.   : 0.82  
##  1st Qu.:2.000   1st Qu.:2.000    1st Qu.:0.00000   1st Qu.:13.94  
##  Median :3.000   Median :4.000    Median :0.00000   Median :20.50  
##  Mean   :2.507   Mean   :4.014    Mean   :0.02857   Mean   :20.23  
##  3rd Qu.:4.000   3rd Qu.:6.000    3rd Qu.:0.00000   3rd Qu.:26.24  
##  Max.   :4.000   Max.   :7.000    Max.   :1.00000   Max.   :41.00  
##  sensacion_termica    humedad       velocidad_del_viento
##  Min.   : 0.76     Min.   :  0.00   Min.   : 0.000      
##  1st Qu.:16.66     1st Qu.: 47.00   1st Qu.: 7.002      
##  Median :24.24     Median : 62.00   Median :12.998      
##  Mean   :23.66     Mean   : 61.89   Mean   :12.799      
##  3rd Qu.:31.06     3rd Qu.: 77.00   3rd Qu.:16.998      
##  Max.   :45.45     Max.   :100.00   Max.   :56.997      
##  rentas_de_no_registrados rentas_de_registrados rentas_totales 
##  Min.   :  0.00           Min.   :  0.0         Min.   :  1.0  
##  1st Qu.:  4.00           1st Qu.: 36.0         1st Qu.: 42.0  
##  Median : 17.00           Median :118.0         Median :145.0  
##  Mean   : 36.02           Mean   :155.6         Mean   :191.6  
##  3rd Qu.: 49.00           3rd Qu.:222.0         3rd Qu.:284.0  
##  Max.   :367.00           Max.   :886.0         Max.   :977.0

Generación de modelo

#Nivel de confiabilidad = 1-Nivel de significancia 
#El nivel de significacnia sale del simbolo derecho de cada coeficiente y su valor esta en la descripcion
regresion <- lm(rentas_totales~factor(hora)+factor(dia)+factor(mes)+año+temperatura+sensacion_termica+humedad+velocidad_del_viento, data=data)
summary(regresion)
## 
## Call:
## lm(formula = rentas_totales ~ factor(hora) + factor(dia) + factor(mes) + 
##     año + temperatura + sensacion_termica + humedad + velocidad_del_viento, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -380.09  -62.00   -6.52   52.64  440.33 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.752e+05  3.989e+03 -43.915  < 2e-16 ***
## factor(hora)1        -1.763e+01  6.745e+00  -2.614 0.008967 ** 
## factor(hora)2        -2.753e+01  6.771e+00  -4.066 4.82e-05 ***
## factor(hora)3        -3.836e+01  6.836e+00  -5.611 2.06e-08 ***
## factor(hora)4        -3.892e+01  6.807e+00  -5.717 1.11e-08 ***
## factor(hora)5        -2.257e+01  6.773e+00  -3.333 0.000861 ***
## factor(hora)6         3.617e+01  6.765e+00   5.347 9.12e-08 ***
## factor(hora)7         1.698e+02  6.755e+00  25.140  < 2e-16 ***
## factor(hora)8         3.133e+02  6.745e+00  46.447  < 2e-16 ***
## factor(hora)9         1.636e+02  6.747e+00  24.243  < 2e-16 ***
## factor(hora)10        1.071e+02  6.771e+00  15.821  < 2e-16 ***
## factor(hora)11        1.327e+02  6.814e+00  19.476  < 2e-16 ***
## factor(hora)12        1.711e+02  6.862e+00  24.931  < 2e-16 ***
## factor(hora)13        1.658e+02  6.917e+00  23.975  < 2e-16 ***
## factor(hora)14        1.476e+02  6.959e+00  21.216  < 2e-16 ***
## factor(hora)15        1.576e+02  6.970e+00  22.616  < 2e-16 ***
## factor(hora)16        2.214e+02  6.959e+00  31.806  < 2e-16 ***
## factor(hora)17        3.783e+02  6.918e+00  54.688  < 2e-16 ***
## factor(hora)18        3.453e+02  6.872e+00  50.247  < 2e-16 ***
## factor(hora)19        2.372e+02  6.815e+00  34.802  < 2e-16 ***
## factor(hora)20        1.560e+02  6.779e+00  23.007  < 2e-16 ***
## factor(hora)21        1.065e+02  6.755e+00  15.761  < 2e-16 ***
## factor(hora)22        7.151e+01  6.744e+00  10.603  < 2e-16 ***
## factor(hora)23        3.158e+01  6.738e+00   4.687 2.80e-06 ***
## factor(dia)2          4.115e+00  6.008e+00   0.685 0.493343    
## factor(dia)3          1.075e+01  6.020e+00   1.785 0.074300 .  
## factor(dia)4          1.415e+01  6.008e+00   2.355 0.018541 *  
## factor(dia)5          9.417e+00  6.007e+00   1.568 0.116995    
## factor(dia)6          1.317e+01  6.016e+00   2.189 0.028625 *  
## factor(dia)7          3.379e+00  6.001e+00   0.563 0.573420    
## factor(dia)8          7.148e-02  6.007e+00   0.012 0.990506    
## factor(dia)9          1.130e+01  6.005e+00   1.883 0.059793 .  
## factor(dia)10         8.864e+00  6.024e+00   1.471 0.141221    
## factor(dia)11         1.323e+01  6.034e+00   2.192 0.028415 *  
## factor(dia)12         1.143e+01  6.016e+00   1.900 0.057513 .  
## factor(dia)13         1.172e+01  6.025e+00   1.945 0.051821 .  
## factor(dia)14         1.209e+01  6.009e+00   2.012 0.044210 *  
## factor(dia)15         1.767e+01  6.010e+00   2.940 0.003290 ** 
## factor(dia)16         1.170e+01  6.002e+00   1.950 0.051235 .  
## factor(dia)17         2.636e+01  6.034e+00   4.369 1.26e-05 ***
## factor(dia)18         7.887e+00  6.036e+00   1.307 0.191323    
## factor(dia)19         9.206e+00  6.003e+00   1.534 0.125133    
## factor(mes)2          1.157e+01  4.854e+00   2.384 0.017121 *  
## factor(mes)3          3.138e+01  5.153e+00   6.090 1.17e-09 ***
## factor(mes)4          5.375e+01  5.508e+00   9.760  < 2e-16 ***
## factor(mes)5          8.376e+01  6.237e+00  13.429  < 2e-16 ***
## factor(mes)6          7.427e+01  7.103e+00  10.456  < 2e-16 ***
## factor(mes)7          4.833e+01  8.002e+00   6.040 1.59e-09 ***
## factor(mes)8          6.097e+01  7.789e+00   7.827 5.46e-15 ***
## factor(mes)9          8.688e+01  6.904e+00  12.583  < 2e-16 ***
## factor(mes)10         1.011e+02  5.942e+00  17.016  < 2e-16 ***
## factor(mes)11         8.579e+01  5.084e+00  16.874  < 2e-16 ***
## factor(mes)12         8.165e+01  5.035e+00  16.215  < 2e-16 ***
## año                   8.709e+01  1.983e+00  43.912  < 2e-16 ***
## temperatura           1.486e+00  8.575e-01   1.733 0.083086 .  
## sensacion_termica     3.096e+00  7.152e-01   4.329 1.51e-05 ***
## humedad              -1.297e+00  6.283e-02 -20.650  < 2e-16 ***
## velocidad_del_viento -7.830e-01  1.329e-01  -5.893 3.92e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 101.7 on 10828 degrees of freedom
## Multiple R-squared:  0.6867, Adjusted R-squared:  0.685 
## F-statistic: 416.4 on 57 and 10828 DF,  p-value: < 2.2e-16

Generar pronósticos

datos_nuevos <- data.frame(
  hora = 12,
  dia = 1,
  mes = 1:12,
  año = 2013,
  estacion = 1,                
  dia_de_la_semana = 1,
  asueto = 0,                  
  temperatura = 25,             
  sensacion_termica = 24,
  humedad = 62,
  velocidad_del_viento = 13     
)

predict(regresion, datos_nuevos)
##        1        2        3        4        5        6        7        8 
## 306.0648 317.6388 337.4445 359.8187 389.8275 380.3322 354.3967 367.0306 
##        9       10       11       12 
## 392.9444 407.1744 391.8518 387.7119

Conclusiones

Modelo altamente significativo (pi value) con un poder explicativo del 69% Picos de rentas en horarios de 8am y 5-6pm Efecto mensual con fuerte estacionalidad