# file.choose()
df <- read.csv("/cloud/project/rentadebicis.csv")
summary(df)
## hora dia mes año
## Min. : 0.00 Min. : 1.000 Min. : 1.000 Min. :2011
## 1st Qu.: 6.00 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:2011
## Median :12.00 Median :10.000 Median : 7.000 Median :2012
## Mean :11.54 Mean : 9.993 Mean : 6.521 Mean :2012
## 3rd Qu.:18.00 3rd Qu.:15.000 3rd Qu.:10.000 3rd Qu.:2012
## Max. :23.00 Max. :19.000 Max. :12.000 Max. :2012
## estacion dia_de_la_semana asueto temperatura
## Min. :1.000 Min. :1.000 Min. :0.00000 Min. : 0.82
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:13.94
## Median :3.000 Median :4.000 Median :0.00000 Median :20.50
## Mean :2.507 Mean :4.014 Mean :0.02857 Mean :20.23
## 3rd Qu.:4.000 3rd Qu.:6.000 3rd Qu.:0.00000 3rd Qu.:26.24
## Max. :4.000 Max. :7.000 Max. :1.00000 Max. :41.00
## sensacion_termica humedad velocidad_del_viento
## Min. : 0.76 Min. : 0.00 Min. : 0.000
## 1st Qu.:16.66 1st Qu.: 47.00 1st Qu.: 7.002
## Median :24.24 Median : 62.00 Median :12.998
## Mean :23.66 Mean : 61.89 Mean :12.799
## 3rd Qu.:31.06 3rd Qu.: 77.00 3rd Qu.:16.998
## Max. :45.45 Max. :100.00 Max. :56.997
## rentas_de_no_registrados rentas_de_registrados rentas_totales
## Min. : 0.00 Min. : 0.0 Min. : 1.0
## 1st Qu.: 4.00 1st Qu.: 36.0 1st Qu.: 42.0
## Median : 17.00 Median :118.0 Median :145.0
## Mean : 36.02 Mean :155.6 Mean :191.6
## 3rd Qu.: 49.00 3rd Qu.:222.0 3rd Qu.:284.0
## Max. :367.00 Max. :886.0 Max. :977.0
str(df)
## 'data.frame': 10886 obs. of 14 variables:
## $ hora : int 0 1 2 3 4 5 6 7 8 9 ...
## $ dia : int 1 1 1 1 1 1 1 1 1 1 ...
## $ mes : int 1 1 1 1 1 1 1 1 1 1 ...
## $ año : int 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
## $ estacion : int 1 1 1 1 1 1 1 1 1 1 ...
## $ dia_de_la_semana : int 6 6 6 6 6 6 6 6 6 6 ...
## $ asueto : int 0 0 0 0 0 0 0 0 0 0 ...
## $ temperatura : num 9.84 9.02 9.02 9.84 9.84 ...
## $ sensacion_termica : num 14.4 13.6 13.6 14.4 14.4 ...
## $ humedad : int 81 80 80 75 75 75 80 86 75 76 ...
## $ velocidad_del_viento : num 0 0 0 0 0 ...
## $ rentas_de_no_registrados: int 3 8 5 3 0 0 2 1 1 8 ...
## $ rentas_de_registrados : int 13 32 27 10 1 1 0 2 7 6 ...
## $ rentas_totales : int 16 40 32 13 1 1 2 3 8 14 ...
regresion <- lm(rentas_totales ~ estacion + dia_de_la_semana + asueto + temperatura + sensacion_termica + humedad + velocidad_del_viento, data= df) #lm de lineal model
summary(regresion)
##
## Call:
## lm(formula = rentas_totales ~ estacion + dia_de_la_semana + asueto +
## temperatura + sensacion_termica + humedad + velocidad_del_viento,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -321.77 -102.22 -31.89 66.27 672.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 135.91620 9.08271 14.964 < 2e-16 ***
## estacion 22.56960 1.42459 15.843 < 2e-16 ***
## dia_de_la_semana 0.36815 0.76078 0.484 0.6285
## asueto -7.68641 9.14519 -0.840 0.4007
## temperatura 1.89084 1.14124 1.657 0.0976 .
## sensacion_termica 5.58885 1.05009 5.322 1.05e-07 ***
## humedad -2.96443 0.08359 -35.462 < 2e-16 ***
## velocidad_del_viento 0.84588 0.19811 4.270 1.97e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 155.8 on 10878 degrees of freedom
## Multiple R-squared: 0.2606, Adjusted R-squared: 0.2601
## F-statistic: 547.6 on 7 and 10878 DF, p-value: < 2.2e-16
plot(regresion)
regresion <- lm(rentas_totales ~ estacion + sensacion_termica + humedad + velocidad_del_viento, data= df) #lm de lineal model
summary(regresion)
##
## Call:
## lm(formula = rentas_totales ~ estacion + sensacion_termica +
## humedad + velocidad_del_viento, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -323.84 -102.14 -32.15 66.63 671.50
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 134.76923 8.17509 16.485 < 2e-16 ***
## estacion 22.60282 1.42319 15.882 < 2e-16 ***
## sensacion_termica 7.29949 0.18391 39.690 < 2e-16 ***
## humedad -2.97488 0.08336 -35.687 < 2e-16 ***
## velocidad_del_viento 0.90497 0.19408 4.663 3.16e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 155.8 on 10881 degrees of freedom
## Multiple R-squared: 0.2603, Adjusted R-squared: 0.26
## F-statistic: 957.3 on 4 and 10881 DF, p-value: < 2.2e-16
plot(regresion)
datos <- data.frame(estacion=1, sensacion_termica=24, humedad=62, velocidad_del_viento=13)
predict(regresion,datos)
## 1
## 159.8819