Una empresa de renta de bicis quiere obtener un pronóstico de rentas mensuales para el próximo año.
library(readxl)
bd <- read_excel("/Users/pedrovillanueva/Desktop/Copia de rentadebicis.xls")
resumen <- summary(bd)
resumen
## hora dia mes aÒo
## Min. : 0.00 Min. : 1.000 Min. : 1.000 Min. :2011
## 1st Qu.: 6.00 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:2011
## Median :12.00 Median :10.000 Median : 7.000 Median :2012
## Mean :11.54 Mean : 9.993 Mean : 6.521 Mean :2012
## 3rd Qu.:18.00 3rd Qu.:15.000 3rd Qu.:10.000 3rd Qu.:2012
## Max. :23.00 Max. :19.000 Max. :12.000 Max. :2012
## estacion dia_de_la_semana asueto temperatura
## Min. :1.000 Min. :1.000 Min. :0.00000 Min. : 0.82
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:13.94
## Median :3.000 Median :4.000 Median :0.00000 Median :20.50
## Mean :2.507 Mean :4.014 Mean :0.02857 Mean :20.23
## 3rd Qu.:4.000 3rd Qu.:6.000 3rd Qu.:0.00000 3rd Qu.:26.24
## Max. :4.000 Max. :7.000 Max. :1.00000 Max. :41.00
## sensacion_termica humedad velocidad_del_viento
## Min. : 0.76 Min. : 0.00 Min. : 0.000
## 1st Qu.:16.66 1st Qu.: 47.00 1st Qu.: 7.002
## Median :24.24 Median : 62.00 Median :12.998
## Mean :23.66 Mean : 61.89 Mean :12.799
## 3rd Qu.:31.06 3rd Qu.: 77.00 3rd Qu.:16.998
## Max. :45.45 Max. :100.00 Max. :56.997
## rentas_de_no_registrados rentas_de_registrados rentas_totales
## Min. : 0.00 Min. : 0.0 Min. : 1.0
## 1st Qu.: 4.00 1st Qu.: 36.0 1st Qu.: 42.0
## Median : 17.00 Median :118.0 Median :145.0
## Mean : 36.02 Mean :155.6 Mean :191.6
## 3rd Qu.: 49.00 3rd Qu.:222.0 3rd Qu.:284.0
## Max. :367.00 Max. :886.0 Max. :977.0
plot(bd$temperatura,bd$rentas_totales, main="Influecnia de la temperatura sobre las rentas",xlab="Temperatura",ylab="Rentas totales")
regresion <- lm(rentas_totales ~ hora+dia_de_la_semana+asueto+mes+estacion+sensacion_termica+temperatura, data= bd)
summary(regresion)
##
## Call:
## lm(formula = rentas_totales ~ hora + dia_de_la_semana + asueto +
## mes + estacion + sensacion_termica + temperatura, data = bd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -322.17 -101.13 -31.04 58.30 679.97
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -100.4525 6.6559 -15.092 < 2e-16 ***
## hora 9.2886 0.2152 43.170 < 2e-16 ***
## dia_de_la_semana 0.9704 0.7482 1.297 0.19468
## asueto -2.8075 9.0568 -0.310 0.75657
## mes 5.6524 1.8145 3.115 0.00184 **
## estacion -3.5792 5.6027 -0.639 0.52295
## sensacion_termica 1.0030 1.0066 0.996 0.31909
## temperatura 6.3957 1.0936 5.848 5.11e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 153.4 on 10878 degrees of freedom
## Multiple R-squared: 0.2832, Adjusted R-squared: 0.2827
## F-statistic: 613.9 on 7 and 10878 DF, p-value: < 2.2e-16
regresion_ajustada <- lm(rentas_totales ~ hora+mes+temperatura, data= bd)
summary(regresion_ajustada)
##
## Call:
## lm(formula = rentas_totales ~ hora + mes + temperatura, data = bd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -323.97 -101.52 -30.82 58.43 676.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -96.1240 4.9236 -19.52 <2e-16 ***
## hora 9.2873 0.2151 43.17 <2e-16 ***
## mes 4.5601 0.4423 10.31 <2e-16 ***
## temperatura 7.4524 0.1976 37.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 153.4 on 10882 degrees of freedom
## Multiple R-squared: 0.283, Adjusted R-squared: 0.2828
## F-statistic: 1431 on 3 and 10882 DF, p-value: < 2.2e-16
datos <- data.frame(hora = 12, mes = 6, temperatura = 29.12)
predict(regresion_ajustada,datos)
## 1
## 259.699