library(rJava)
library(xlsx)
## Loading required package: xlsxjars
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(GGally)
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
library(tidyr)
library(broom)
library(scatterplot3d)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
taxi_SD<-read.xlsx(file = "taxi_supply_demand.xlsx",sheetIndex = 1)
head(taxi_SD)
str(taxi_SD)
## 'data.frame': 168 obs. of 4 variables:
## $ period : POSIXct, format: "2017-10-09 00:00:00" "2017-10-09 00:59:59" ...
## $ drivers.online: num 281 220 164 150 161 190 222 294 361 407 ...
## $ eyeballs : num 180 104 62 51 43 37 59 120 185 202 ...
## $ rides : num 84 50 21 21 20 18 24 43 99 104 ...
cor(taxi_SD[2:4])
## drivers.online eyeballs rides
## drivers.online 1.0000000 0.9294869 0.9268260
## eyeballs 0.9294869 1.0000000 0.9807348
## rides 0.9268260 0.9807348 1.0000000
taxi_SD$weekday <- weekdays(taxi_SD$period)
taxi_SD$Hour <- format(taxi_SD$period,"%H")
ggpairs(taxi_SD, aes(colour = weekday, alpha = 0.4),columns = c(2:4))

summary(aov(rides~weekday,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## weekday 6 46121 7687 2.099 0.0561 .
## Residuals 161 589508 3662
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=weekday, y=rides, color=weekday)) + geom_boxplot() + geom_jitter()

summary(aov(eyeballs~weekday,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## weekday 6 92782 15464 1.348 0.239
## Residuals 161 1846258 11467
ggplot(taxi_SD, aes(x=weekday, y=eyeballs, color=weekday)) + geom_boxplot() + geom_jitter()

summary(aov(drivers.online~weekday,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## weekday 6 301894 50316 2.599 0.0197 *
## Residuals 161 3117105 19361
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=weekday, y=drivers.online, color=weekday)) + geom_boxplot() + geom_jitter()

summary(aov(rides~Hour,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## Hour 15 490205 32680 34.16 <2e-16 ***
## Residuals 152 145424 957
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=rides, color=Hour)) + geom_boxplot()

summary(aov(eyeballs~Hour,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## Hour 15 1631616 108774 53.78 <2e-16 ***
## Residuals 152 307424 2023
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=eyeballs, color=Hour)) + geom_boxplot()

summary(aov(drivers.online~Hour,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## Hour 15 2635319 175688 34.08 <2e-16 ***
## Residuals 152 783680 5156
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=drivers.online, color=Hour)) + geom_boxplot()

summary(aov(rides~Hour+weekday,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## Hour 15 490205 32680 48.05 < 2e-16 ***
## weekday 6 46121 7687 11.30 2.38e-10 ***
## Residuals 146 99302 680
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=rides, color=weekday)) + geom_boxplot()

summary(aov(eyeballs~Hour+weekday,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## Hour 15 1631616 108774 73.99 < 2e-16 ***
## weekday 6 92782 15464 10.52 1.1e-09 ***
## Residuals 146 214642 1470
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=eyeballs, color=weekday)) + geom_boxplot()

summary(aov(drivers.online~Hour+weekday,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## Hour 15 2635319 175688 53.24 < 2e-16 ***
## weekday 6 301894 50316 15.25 1.62e-13 ***
## Residuals 146 481786 3300
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=drivers.online, color=weekday)) + geom_boxplot()

summary(aov(rides~Hour*weekday,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## Hour 15 490205 32680 67.714 < 2e-16 ***
## weekday 6 46121 7687 15.927 1.41e-10 ***
## Hour:weekday 90 72275 803 1.664 0.0207 *
## Residuals 56 27027 483
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(eyeballs~Hour*weekday,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## Hour 15 1631616 108774 100.92 < 2e-16 ***
## weekday 6 92782 15464 14.35 7.96e-10 ***
## Hour:weekday 90 154283 1714 1.59 0.0314 *
## Residuals 56 60359 1078
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(drivers.online~Hour*weekday,taxi_SD))
## Df Sum Sq Mean Sq F value Pr(>F)
## Hour 15 2635319 175688 192.320 < 2e-16 ***
## weekday 6 301894 50316 55.079 < 2e-16 ***
## Hour:weekday 90 430629 4785 5.238 3.53e-10 ***
## Residuals 56 51157 914
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.lm <- lm(rides~eyeballs+drivers.online,taxi_SD)
summary(fit.lm)
##
## Call:
## lm(formula = rides ~ eyeballs + drivers.online, data = taxi_SD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.010 -6.340 0.754 5.912 32.008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.22920 3.39468 -3.602 0.000417 ***
## eyeballs 0.50188 0.02307 21.754 < 2e-16 ***
## drivers.online 0.04832 0.01737 2.781 0.006052 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.85 on 165 degrees of freedom
## Multiple R-squared: 0.9635, Adjusted R-squared: 0.9631
## F-statistic: 2181 on 2 and 165 DF, p-value: < 2.2e-16
fit.lm.2 <- lm(rides~eyeballs+drivers.online+Hour+weekday,taxi_SD)
summary(fit.lm.2)
##
## Call:
## lm(formula = rides ~ eyeballs + drivers.online + Hour + weekday,
## data = taxi_SD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.192 -6.409 -0.162 5.407 25.357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22.61278 5.83248 -3.877 0.000160 ***
## eyeballs 0.56540 0.03214 17.591 < 2e-16 ***
## drivers.online 0.05359 0.02145 2.498 0.013612 *
## Hour02 10.18566 5.18527 1.964 0.051415 .
## Hour03 13.32320 4.84029 2.753 0.006675 **
## Hour05 13.42633 5.73713 2.340 0.020644 *
## Hour06 5.00578 4.58289 1.092 0.276536
## Hour08 -6.57125 4.77844 -1.375 0.171209
## Hour09 -7.50143 4.07735 -1.840 0.067860 .
## Hour11 -5.52912 5.09648 -1.085 0.279783
## Hour12 -9.31811 4.45398 -2.092 0.038186 *
## Hour14 -6.37367 5.38245 -1.184 0.238301
## Hour15 -5.65373 4.80285 -1.177 0.241073
## Hour17 -14.23145 5.77526 -2.464 0.014909 *
## Hour18 -17.80502 5.20765 -3.419 0.000818 ***
## Hour20 -17.68589 5.86232 -3.017 0.003020 **
## Hour21 -5.99741 5.06928 -1.183 0.238724
## Hour23 -4.46137 5.53512 -0.806 0.421565
## weekdayВторник -4.26756 3.03455 -1.406 0.161783
## weekdayПонедельник -10.71658 2.99234 -3.581 0.000467 ***
## weekdayПятница -3.92280 3.36272 -1.167 0.245317
## weekdayСреда 4.06617 3.28919 1.236 0.218389
## weekdayСуббота -1.65966 3.16712 -0.524 0.601064
## weekdayЧетверг -2.14187 3.17798 -0.674 0.501410
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.27 on 144 degrees of freedom
## Multiple R-squared: 0.9761, Adjusted R-squared: 0.9723
## F-statistic: 255.6 on 23 and 144 DF, p-value: < 2.2e-16
fit.lm.3 <- lm(rides~eyeballs+drivers.online+Hour*weekday,taxi_SD)
summary(fit.lm.3)
##
## Call:
## lm(formula = rides ~ eyeballs + drivers.online + Hour * weekday,
## data = taxi_SD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.445 -2.589 0.000 2.589 15.445
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.38505 21.54157 0.436 0.664812
## eyeballs 0.64897 0.05698 11.389 5.6e-16 ***
## drivers.online -0.06900 0.06189 -1.115 0.269859
## Hour02 1.48430 12.36797 0.120 0.904920
## Hour03 9.30613 11.84258 0.786 0.435408
## Hour05 7.71956 14.85586 0.520 0.605445
## Hour06 -2.45610 13.63505 -0.180 0.857724
## Hour08 -3.28934 14.46785 -0.227 0.821006
## Hour09 -6.91544 11.63934 -0.594 0.554897
## Hour11 -21.36539 12.95769 -1.649 0.104983
## Hour12 -3.44758 10.04753 -0.343 0.732835
## Hour14 -6.35319 12.11421 -0.524 0.602118
## Hour15 -7.19912 9.88890 -0.728 0.469758
## Hour17 5.25002 12.44034 0.422 0.674689
## Hour18 1.17486 9.84244 0.119 0.905428
## Hour20 -26.32177 12.07034 -2.181 0.033581 *
## Hour21 -21.90375 9.86017 -2.221 0.030534 *
## Hour23 -39.09541 12.71805 -3.074 0.003309 **
## weekdayВторник -23.60394 13.14986 -1.795 0.078252 .
## weekdayПонедельник -17.25485 13.39983 -1.288 0.203345
## weekdayПятница -2.56929 10.65231 -0.241 0.810317
## weekdayСреда -12.64899 12.07748 -1.047 0.299617
## weekdayСуббота 10.75283 10.12966 1.062 0.293178
## weekdayЧетверг -6.00696 10.98786 -0.547 0.586843
## Hour02:weekdayВторник 13.30785 16.65071 0.799 0.427655
## Hour03:weekdayВторник 10.78418 13.84313 0.779 0.439364
## Hour05:weekdayВторник 11.00818 17.69721 0.622 0.536540
## Hour06:weekdayВторник 21.22006 17.41525 1.218 0.228340
## Hour08:weekdayВторник 13.40440 22.26473 0.602 0.549663
## Hour09:weekdayВторник 21.23349 20.62982 1.029 0.307945
## Hour11:weekdayВторник 39.63983 23.03400 1.721 0.090990 .
## Hour12:weekdayВторник 19.79404 19.60717 1.010 0.317223
## Hour14:weekdayВторник 29.28263 20.48287 1.430 0.158588
## Hour15:weekdayВторник 26.52270 18.54432 1.430 0.158411
## Hour17:weekdayВторник 15.29325 20.93333 0.731 0.468200
## Hour18:weekdayВторник 8.70207 17.90013 0.486 0.628831
## Hour20:weekdayВторник 41.52176 20.63336 2.012 0.049181 *
## Hour21:weekdayВторник 35.44198 16.68026 2.125 0.038198 *
## Hour23:weekdayВторник 68.03099 19.04544 3.572 0.000754 ***
## Hour02:weekdayПонедельник -1.53486 16.64007 -0.092 0.926850
## Hour03:weekdayПонедельник -0.70858 13.86367 -0.051 0.959426
## Hour05:weekdayПонедельник 7.24825 17.91848 0.405 0.687434
## Hour06:weekdayПонедельник 3.54477 16.98429 0.209 0.835461
## Hour08:weekdayПонедельник 15.00797 21.38532 0.702 0.485825
## Hour09:weekdayПонедельник 21.32006 20.39748 1.045 0.300573
## Hour11:weekdayПонедельник 24.89984 22.24542 1.119 0.267956
## Hour12:weekdayПонедельник -4.36018 17.77082 -0.245 0.807111
## Hour14:weekdayПонедельник -3.70970 19.67901 -0.189 0.851183
## Hour15:weekdayПонедельник 22.06838 17.47880 1.263 0.212166
## Hour17:weekdayПонедельник -8.21005 20.15112 -0.407 0.685307
## Hour18:weekdayПонедельник -32.51850 16.26196 -2.000 0.050578 .
## Hour20:weekdayПонедельник 24.23434 19.29934 1.256 0.214628
## Hour21:weekdayПонедельник 12.45180 15.57012 0.800 0.427375
## Hour23:weekdayПонедельник 29.37908 18.63624 1.576 0.120763
## Hour02:weekdayПятница -0.51320 16.68594 -0.031 0.975577
## Hour03:weekdayПятница -7.38768 13.59390 -0.543 0.589053
## Hour05:weekdayПятница -2.47733 16.89794 -0.147 0.883989
## Hour06:weekdayПятница 2.36796 15.64521 0.151 0.880261
## Hour08:weekdayПятница -9.64732 20.41822 -0.472 0.638484
## Hour09:weekdayПятница -2.36162 19.57564 -0.121 0.904423
## Hour11:weekdayПятница 36.30393 21.56372 1.684 0.098039 .
## Hour12:weekdayПятница 9.63358 18.42579 0.523 0.603230
## Hour14:weekdayПятница 14.40765 21.20848 0.679 0.499828
## Hour15:weekdayПятница 15.21197 18.20319 0.836 0.407020
## Hour17:weekdayПятница -23.65605 20.84530 -1.135 0.261455
## Hour18:weekdayПятница -5.46916 17.95460 -0.305 0.761834
## Hour20:weekdayПятница 37.00197 22.89709 1.616 0.111920
## Hour21:weekdayПятница 42.13300 20.46109 2.059 0.044313 *
## Hour23:weekdayПятница 34.91086 23.23360 1.503 0.138768
## Hour02:weekdayСреда 8.71869 16.64982 0.524 0.602664
## Hour03:weekdayСреда 5.86380 14.17654 0.414 0.680785
## Hour05:weekdayСреда 5.82457 17.64040 0.330 0.742539
## Hour06:weekdayСреда 25.37758 17.41403 1.457 0.150823
## Hour08:weekdayСреда 14.84997 22.46531 0.661 0.511411
## Hour09:weekdayСреда 17.24590 20.71477 0.833 0.408771
## Hour11:weekdayСреда 60.84655 23.41818 2.598 0.012053 *
## Hour12:weekdayСреда 30.47752 20.36167 1.497 0.140266
## Hour14:weekdayСреда 30.58834 22.94877 1.333 0.188162
## Hour15:weekdayСреда 35.12090 19.74220 1.779 0.080872 .
## Hour17:weekdayСреда 2.19771 21.57071 0.102 0.919226
## Hour18:weekdayСреда 19.00018 19.70162 0.964 0.339146
## Hour20:weekdayСреда 56.69162 22.01444 2.575 0.012790 *
## Hour21:weekdayСреда 37.88181 17.47478 2.168 0.034601 *
## Hour23:weekdayСреда 62.11980 20.07158 3.095 0.003117 **
## Hour02:weekdayСуббота 11.60694 16.70285 0.695 0.490092
## Hour03:weekdayСуббота -13.55014 13.77600 -0.984 0.329697
## Hour05:weekdayСуббота -10.19309 16.84307 -0.605 0.547594
## Hour06:weekdayСуббота -10.15156 13.64689 -0.744 0.460178
## Hour08:weekdayСуббота -18.67337 16.68344 -1.119 0.267977
## Hour09:weekdayСуббота -16.88287 13.58991 -1.242 0.219491
## Hour11:weekdayСуббота -7.34141 16.69694 -0.440 0.661919
## Hour12:weekdayСуббота -20.90088 13.60845 -1.536 0.130408
## Hour14:weekdayСуббота -4.28590 16.65187 -0.257 0.797862
## Hour15:weekdayСуббота 0.75886 13.66134 0.056 0.955907
## Hour17:weekdayСуббота -16.30568 16.80311 -0.970 0.336175
## Hour18:weekdayСуббота -23.85605 13.64368 -1.749 0.086060 .
## Hour20:weekdayСуббота 1.56650 17.04554 0.092 0.927117
## Hour21:weekdayСуббота 12.43388 14.03225 0.886 0.379497
## Hour23:weekdayСуббота 21.47397 17.07963 1.257 0.214061
## Hour02:weekdayЧетверг -1.64733 16.78168 -0.098 0.922167
## Hour03:weekdayЧетверг -5.54749 13.67365 -0.406 0.686561
## Hour05:weekdayЧетверг -4.27536 17.13012 -0.250 0.803858
## Hour06:weekdayЧетверг 8.38840 16.30007 0.515 0.608916
## Hour08:weekdayЧетверг -21.08400 20.27879 -1.040 0.303109
## Hour09:weekdayЧетверг 6.95224 18.90131 0.368 0.714447
## Hour11:weekdayЧетверг 28.83858 22.68628 1.271 0.209109
## Hour12:weekdayЧетверг 6.40075 17.91573 0.357 0.722282
## Hour14:weekdayЧетверг 28.11931 20.54526 1.369 0.176775
## Hour15:weekdayЧетверг 16.44114 17.76530 0.925 0.358843
## Hour17:weekdayЧетверг -0.59939 20.36594 -0.029 0.976629
## Hour18:weekdayЧетверг 12.33778 17.67592 0.698 0.488173
## Hour20:weekdayЧетверг 17.77602 20.48111 0.868 0.389277
## Hour21:weekdayЧетверг 34.83525 16.93431 2.057 0.044522 *
## Hour23:weekdayЧетверг 52.65392 19.22353 2.739 0.008331 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.606 on 54 degrees of freedom
## Multiple R-squared: 0.9922, Adjusted R-squared: 0.9758
## F-statistic: 60.49 on 113 and 54 DF, p-value: < 2.2e-16
anova(fit.lm,fit.lm.2,fit.lm.3)
hist(fit.lm.2$residuals,col = "blue")

shapiro.test(fit.lm.2$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit.lm.2$residuals
## W = 0.9936, p-value = 0.6731
par(mfrow=c(2,3))
plot(fit.lm.2,which = c(1:6))

ggplot(taxi_SD, aes(eyeballs, rides,size=drivers.online,col=weekday)) + geom_point() + geom_smooth(method = lm,se=F)

taxi_SD %>% group_by(weekday) %>% do(tidy(lm(rides~eyeballs+drivers.online,data =.)))
ggplot(taxi_SD, aes(eyeballs, rides)) + geom_point() + geom_smooth(method = loess,se=T) + facet_wrap(~weekday)

ggplot(taxi_SD, aes(drivers.online, rides)) + geom_point() + geom_smooth(method = loess,se=T) + facet_wrap(~weekday)

taxi3d <- taxi_SD[,2:4]
s3d <- scatterplot3d(taxi3d,grid = T,pch=16, highlight.3d=TRUE,type="h")
fit.3d <- lm(rides~.,data = taxi3d)
s3d$plane3d(fit.3d,draw_polygon = T)
