Regresión Lineal

data <- read.csv("/Users/marceloreyesp/Desktop/Negocios/6to Semestre LIT/Clase Codigos/Clase Codigo R/rentadebicis.csv") 

str(data)
## 'data.frame':    10886 obs. of  14 variables:
##  $ hora                    : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ dia                     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ mes                     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ año                     : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
##  $ estacion                : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ dia_de_la_semana        : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ asueto                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ temperatura             : num  9.84 9.02 9.02 9.84 9.84 ...
##  $ sensacion_termica       : num  14.4 13.6 13.6 14.4 14.4 ...
##  $ humedad                 : int  81 80 80 75 75 75 80 86 75 76 ...
##  $ velocidad_del_viento    : num  0 0 0 0 0 ...
##  $ rentas_de_no_registrados: int  3 8 5 3 0 0 2 1 1 8 ...
##  $ rentas_de_registrados   : int  13 32 27 10 1 1 0 2 7 6 ...
##  $ rentas_totales          : int  16 40 32 13 1 1 2 3 8 14 ...
summary(data)
##       hora            dia              mes              año      
##  Min.   : 0.00   Min.   : 1.000   Min.   : 1.000   Min.   :2011  
##  1st Qu.: 6.00   1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:2011  
##  Median :12.00   Median :10.000   Median : 7.000   Median :2012  
##  Mean   :11.54   Mean   : 9.993   Mean   : 6.521   Mean   :2012  
##  3rd Qu.:18.00   3rd Qu.:15.000   3rd Qu.:10.000   3rd Qu.:2012  
##  Max.   :23.00   Max.   :19.000   Max.   :12.000   Max.   :2012  
##     estacion     dia_de_la_semana     asueto         temperatura   
##  Min.   :1.000   Min.   :1.000    Min.   :0.00000   Min.   : 0.82  
##  1st Qu.:2.000   1st Qu.:2.000    1st Qu.:0.00000   1st Qu.:13.94  
##  Median :3.000   Median :4.000    Median :0.00000   Median :20.50  
##  Mean   :2.507   Mean   :4.014    Mean   :0.02857   Mean   :20.23  
##  3rd Qu.:4.000   3rd Qu.:6.000    3rd Qu.:0.00000   3rd Qu.:26.24  
##  Max.   :4.000   Max.   :7.000    Max.   :1.00000   Max.   :41.00  
##  sensacion_termica    humedad       velocidad_del_viento
##  Min.   : 0.76     Min.   :  0.00   Min.   : 0.000      
##  1st Qu.:16.66     1st Qu.: 47.00   1st Qu.: 7.002      
##  Median :24.24     Median : 62.00   Median :12.998      
##  Mean   :23.66     Mean   : 61.89   Mean   :12.799      
##  3rd Qu.:31.06     3rd Qu.: 77.00   3rd Qu.:16.998      
##  Max.   :45.45     Max.   :100.00   Max.   :56.997      
##  rentas_de_no_registrados rentas_de_registrados rentas_totales 
##  Min.   :  0.00           Min.   :  0.0         Min.   :  1.0  
##  1st Qu.:  4.00           1st Qu.: 36.0         1st Qu.: 42.0  
##  Median : 17.00           Median :118.0         Median :145.0  
##  Mean   : 36.02           Mean   :155.6         Mean   :191.6  
##  3rd Qu.: 49.00           3rd Qu.:222.0         3rd Qu.:284.0  
##  Max.   :367.00           Max.   :886.0         Max.   :977.0

Identificar las variables que son factores (primero debemos convertir las numéricas a categóricas)

data$estacion <- as.factor(data$estacion)
data$hora <- as.factor(data$hora)
data$dia_de_la_semana <- as.factor(data$dia_de_la_semana)
data$asueto <- as.factor(data$asueto)

names(Filter(is.factor, data))
## [1] "hora"             "estacion"         "dia_de_la_semana" "asueto"
modelo_lineal <- lm(rentas_totales ~ temperatura, data = data)
summary(modelo_lineal)
## 
## Call:
## lm(formula = rentas_totales ~ temperatura, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -293.32 -112.36  -33.36   78.98  741.44 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.0462     4.4394   1.362    0.173    
## temperatura   9.1705     0.2048  44.783   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 166.5 on 10884 degrees of freedom
## Multiple R-squared:  0.1556, Adjusted R-squared:  0.1555 
## F-statistic:  2006 on 1 and 10884 DF,  p-value: < 2.2e-16
modelo_multiple <- lm(rentas_totales ~ humedad + velocidad_del_viento + hora + estacion + dia_de_la_semana, data = data)
summary(modelo_multiple)
## 
## Call:
## lm(formula = rentas_totales ~ humedad + velocidad_del_viento + 
##     hora + estacion + dia_de_la_semana, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -403.63  -61.83  -11.09   52.62  519.17 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           80.24406    8.08889   9.920  < 2e-16 ***
## humedad               -1.45147    0.06833 -21.243  < 2e-16 ***
## velocidad_del_viento  -1.10935    0.14483  -7.660 2.02e-14 ***
## hora1                -19.31309    7.61797  -2.535 0.011252 *  
## hora2                -30.24205    7.64475  -3.956 7.67e-05 ***
## hora3                -40.83087    7.71431  -5.293 1.23e-07 ***
## hora4                -42.79988    7.67805  -5.574 2.54e-08 ***
## hora5                -28.96085    7.63397  -3.794 0.000149 ***
## hora6                 29.32168    7.62325   3.846 0.000121 ***
## hora7                164.29050    7.61877  21.564  < 2e-16 ***
## hora8                310.97559    7.61580  40.833  < 2e-16 ***
## hora9                165.31132    7.62043  21.693  < 2e-16 ***
## hora10               112.73593    7.63526  14.765  < 2e-16 ***
## hora11               142.30691    7.65942  18.579  < 2e-16 ***
## hora12               183.79786    7.68379  23.920  < 2e-16 ***
## hora13               181.67158    7.71341  23.553  < 2e-16 ***
## hora14               165.51687    7.73500  21.398  < 2e-16 ***
## hora15               176.07138    7.73928  22.750  < 2e-16 ***
## hora16               239.03832    7.73691  30.896  < 2e-16 ***
## hora17               393.88037    7.71538  51.051  < 2e-16 ***
## hora18               358.59085    7.69107  46.624  < 2e-16 ***
## hora19               247.33762    7.65629  32.305  < 2e-16 ***
## hora20               163.53783    7.63443  21.421  < 2e-16 ***
## hora21               111.66580    7.61913  14.656  < 2e-16 ***
## hora22                74.79344    7.61337   9.824  < 2e-16 ***
## hora23                32.89484    7.60957   4.323 1.55e-05 ***
## estacion2            106.84233    3.13637  34.066  < 2e-16 ***
## estacion3            128.68249    3.18039  40.461  < 2e-16 ***
## estacion4             96.32727    3.19957  30.106  < 2e-16 ***
## dia_de_la_semana2      1.99237    4.13355   0.482 0.629816    
## dia_de_la_semana3      3.69224    4.12665   0.895 0.370952    
## dia_de_la_semana4      4.90319    4.13118   1.187 0.235304    
## dia_de_la_semana5      5.37441    4.14355   1.297 0.194640    
## dia_de_la_semana6      8.65941    4.10387   2.110 0.034876 *  
## dia_de_la_semana7     -7.47591    4.10634  -1.821 0.068699 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 114.8 on 10851 degrees of freedom
## Multiple R-squared:  0.5994, Adjusted R-squared:  0.5982 
## F-statistic: 477.6 on 34 and 10851 DF,  p-value: < 2.2e-16

Generar pronósticos

datos_nuevos <- data.frame(
  hora=12,
  estacion=1,
  dia=1,
  mes=1:12,
  ano=2013,
  dia_de_la_semana=1,
  sensacion_termica=24,
  humedad=62,
  velocidad_del_viento=13
)

Convertir a factores para coincidir con el modelo

datos_nuevos$hora <- factor(datos_nuevos$hora, levels = levels(data$hora))
datos_nuevos$estacion <- factor(datos_nuevos$estacion, levels = levels(data$estacion))
datos_nuevos$dia_de_la_semana <- factor(datos_nuevos$dia_de_la_semana, levels = levels(data$dia_de_la_semana))

predict(modelo_multiple, datos_nuevos)
##        1        2        3        4        5        6        7        8 
## 159.6291 159.6291 159.6291 159.6291 159.6291 159.6291 159.6291 159.6291 
##        9       10       11       12 
## 159.6291 159.6291 159.6291 159.6291

Conclusión

Modelo altamente significativo con un poder explicativo del 69% Hay picos de rentas en horarios de 8 am y 5-6pm Efecto mensual con fuerte estacionalidad en los meses 10,11 y 12

LS0tCnRpdGxlOiAiUmVncmVzacOzbiBMaW5lYWwiCmF1dGhvcjogIk1hcmNlbG8gUmV5ZXMgQTAxNzIzMzIxIgpkYXRlOiAiMjAyNi0wMi0xNyIKb3V0cHV0OiAKICAgIGh0bWxfZG9jdW1lbnQ6CiAgICAgICAgdG9jOiBUUlVFCiAgICAgICAgdG9jX2Zsb2F0OiBUUlVFCiAgICAgICAgY29kZV9kb3dubG9hZDogVFJVRQogICAgICAgIHRoZW1lOiBjb3NtbwotLS0gCgojIFJlZ3Jlc2nDs24gTGluZWFsCmBgYHtyfQpkYXRhIDwtIHJlYWQuY3N2KCIvVXNlcnMvbWFyY2Vsb3JleWVzcC9EZXNrdG9wL05lZ29jaW9zLzZ0byBTZW1lc3RyZSBMSVQvQ2xhc2UgQ29kaWdvcy9DbGFzZSBDb2RpZ28gUi9yZW50YWRlYmljaXMuY3N2IikgCgpzdHIoZGF0YSkKc3VtbWFyeShkYXRhKQoKYGBgCiMgSWRlbnRpZmljYXIgbGFzIHZhcmlhYmxlcyBxdWUgc29uIGZhY3RvcmVzIChwcmltZXJvIGRlYmVtb3MgY29udmVydGlyIGxhcyBudW3DqXJpY2FzIGEgY2F0ZWfDs3JpY2FzKQpgYGB7cn0KZGF0YSRlc3RhY2lvbiA8LSBhcy5mYWN0b3IoZGF0YSRlc3RhY2lvbikKZGF0YSRob3JhIDwtIGFzLmZhY3RvcihkYXRhJGhvcmEpCmRhdGEkZGlhX2RlX2xhX3NlbWFuYSA8LSBhcy5mYWN0b3IoZGF0YSRkaWFfZGVfbGFfc2VtYW5hKQpkYXRhJGFzdWV0byA8LSBhcy5mYWN0b3IoZGF0YSRhc3VldG8pCgpuYW1lcyhGaWx0ZXIoaXMuZmFjdG9yLCBkYXRhKSkKCgptb2RlbG9fbGluZWFsIDwtIGxtKHJlbnRhc190b3RhbGVzIH4gdGVtcGVyYXR1cmEsIGRhdGEgPSBkYXRhKQpzdW1tYXJ5KG1vZGVsb19saW5lYWwpCgoKCm1vZGVsb19tdWx0aXBsZSA8LSBsbShyZW50YXNfdG90YWxlcyB+IGh1bWVkYWQgKyB2ZWxvY2lkYWRfZGVsX3ZpZW50byArIGhvcmEgKyBlc3RhY2lvbiArIGRpYV9kZV9sYV9zZW1hbmEsIGRhdGEgPSBkYXRhKQpzdW1tYXJ5KG1vZGVsb19tdWx0aXBsZSkKYGBgCiMgR2VuZXJhciBwcm9uw7NzdGljb3MKYGBge3J9CmRhdG9zX251ZXZvcyA8LSBkYXRhLmZyYW1lKAogIGhvcmE9MTIsCiAgZXN0YWNpb249MSwKICBkaWE9MSwKICBtZXM9MToxMiwKICBhbm89MjAxMywKICBkaWFfZGVfbGFfc2VtYW5hPTEsCiAgc2Vuc2FjaW9uX3Rlcm1pY2E9MjQsCiAgaHVtZWRhZD02MiwKICB2ZWxvY2lkYWRfZGVsX3ZpZW50bz0xMwopCgpgYGAKIyBDb252ZXJ0aXIgYSBmYWN0b3JlcyBwYXJhIGNvaW5jaWRpciBjb24gZWwgbW9kZWxvCmBgYHtyfQpkYXRvc19udWV2b3MkaG9yYSA8LSBmYWN0b3IoZGF0b3NfbnVldm9zJGhvcmEsIGxldmVscyA9IGxldmVscyhkYXRhJGhvcmEpKQpkYXRvc19udWV2b3MkZXN0YWNpb24gPC0gZmFjdG9yKGRhdG9zX251ZXZvcyRlc3RhY2lvbiwgbGV2ZWxzID0gbGV2ZWxzKGRhdGEkZXN0YWNpb24pKQpkYXRvc19udWV2b3MkZGlhX2RlX2xhX3NlbWFuYSA8LSBmYWN0b3IoZGF0b3NfbnVldm9zJGRpYV9kZV9sYV9zZW1hbmEsIGxldmVscyA9IGxldmVscyhkYXRhJGRpYV9kZV9sYV9zZW1hbmEpKQoKcHJlZGljdChtb2RlbG9fbXVsdGlwbGUsIGRhdG9zX251ZXZvcykKYGBgCgojIENvbmNsdXNpw7NuCk1vZGVsbyBhbHRhbWVudGUgc2lnbmlmaWNhdGl2byBjb24gdW4gcG9kZXIgZXhwbGljYXRpdm8gZGVsIDY5JQpIYXkgcGljb3MgZGUgcmVudGFzIGVuIGhvcmFyaW9zIGRlIDggYW0geSA1LTZwbQpFZmVjdG8gbWVuc3VhbCBjb24gZnVlcnRlIGVzdGFjaW9uYWxpZGFkIGVuIGxvcyBtZXNlcyAxMCwxMSB5IDEyCgo=