library(rJava)
library(xlsx)
## Loading required package: xlsxjars
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(GGally)
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
library(tidyr)
library(broom)
library(scatterplot3d)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
taxi_SD<-read.xlsx(file = "taxi_supply_demand.xlsx",sheetIndex = 1)
head(taxi_SD)
str(taxi_SD)
## 'data.frame':    168 obs. of  4 variables:
##  $ period        : POSIXct, format: "2017-10-09 00:00:00" "2017-10-09 00:59:59" ...
##  $ drivers.online: num  281 220 164 150 161 190 222 294 361 407 ...
##  $ eyeballs      : num  180 104 62 51 43 37 59 120 185 202 ...
##  $ rides         : num  84 50 21 21 20 18 24 43 99 104 ...
cor(taxi_SD[2:4])
##                drivers.online  eyeballs     rides
## drivers.online      1.0000000 0.9294869 0.9268260
## eyeballs            0.9294869 1.0000000 0.9807348
## rides               0.9268260 0.9807348 1.0000000
taxi_SD$weekday <- weekdays(taxi_SD$period)
taxi_SD$Hour <- format(taxi_SD$period,"%H")
ggpairs(taxi_SD, aes(colour = weekday, alpha = 0.4),columns = c(2:4))

summary(aov(rides~weekday,taxi_SD))
##              Df Sum Sq Mean Sq F value Pr(>F)  
## weekday       6  46121    7687   2.099 0.0561 .
## Residuals   161 589508    3662                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=weekday, y=rides, color=weekday)) + geom_boxplot() + geom_jitter()

summary(aov(eyeballs~weekday,taxi_SD))
##              Df  Sum Sq Mean Sq F value Pr(>F)
## weekday       6   92782   15464   1.348  0.239
## Residuals   161 1846258   11467
ggplot(taxi_SD, aes(x=weekday, y=eyeballs, color=weekday)) + geom_boxplot() + geom_jitter()

summary(aov(drivers.online~weekday,taxi_SD))
##              Df  Sum Sq Mean Sq F value Pr(>F)  
## weekday       6  301894   50316   2.599 0.0197 *
## Residuals   161 3117105   19361                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=weekday, y=drivers.online, color=weekday)) + geom_boxplot() + geom_jitter()

summary(aov(rides~Hour,taxi_SD))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Hour         15 490205   32680   34.16 <2e-16 ***
## Residuals   152 145424     957                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=rides, color=Hour)) + geom_boxplot()

summary(aov(eyeballs~Hour,taxi_SD))
##              Df  Sum Sq Mean Sq F value Pr(>F)    
## Hour         15 1631616  108774   53.78 <2e-16 ***
## Residuals   152  307424    2023                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=eyeballs, color=Hour)) + geom_boxplot()

summary(aov(drivers.online~Hour,taxi_SD))
##              Df  Sum Sq Mean Sq F value Pr(>F)    
## Hour         15 2635319  175688   34.08 <2e-16 ***
## Residuals   152  783680    5156                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=drivers.online, color=Hour)) + geom_boxplot()

summary(aov(rides~Hour+weekday,taxi_SD))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Hour         15 490205   32680   48.05  < 2e-16 ***
## weekday       6  46121    7687   11.30 2.38e-10 ***
## Residuals   146  99302     680                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=rides, color=weekday)) + geom_boxplot()

summary(aov(eyeballs~Hour+weekday,taxi_SD))
##              Df  Sum Sq Mean Sq F value  Pr(>F)    
## Hour         15 1631616  108774   73.99 < 2e-16 ***
## weekday       6   92782   15464   10.52 1.1e-09 ***
## Residuals   146  214642    1470                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=eyeballs, color=weekday)) + geom_boxplot()

summary(aov(drivers.online~Hour+weekday,taxi_SD))
##              Df  Sum Sq Mean Sq F value   Pr(>F)    
## Hour         15 2635319  175688   53.24  < 2e-16 ***
## weekday       6  301894   50316   15.25 1.62e-13 ***
## Residuals   146  481786    3300                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(taxi_SD, aes(x=Hour, y=drivers.online, color=weekday)) + geom_boxplot()

summary(aov(rides~Hour*weekday,taxi_SD))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Hour         15 490205   32680  67.714  < 2e-16 ***
## weekday       6  46121    7687  15.927 1.41e-10 ***
## Hour:weekday 90  72275     803   1.664   0.0207 *  
## Residuals    56  27027     483                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(eyeballs~Hour*weekday,taxi_SD))
##              Df  Sum Sq Mean Sq F value   Pr(>F)    
## Hour         15 1631616  108774  100.92  < 2e-16 ***
## weekday       6   92782   15464   14.35 7.96e-10 ***
## Hour:weekday 90  154283    1714    1.59   0.0314 *  
## Residuals    56   60359    1078                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(drivers.online~Hour*weekday,taxi_SD))
##              Df  Sum Sq Mean Sq F value   Pr(>F)    
## Hour         15 2635319  175688 192.320  < 2e-16 ***
## weekday       6  301894   50316  55.079  < 2e-16 ***
## Hour:weekday 90  430629    4785   5.238 3.53e-10 ***
## Residuals    56   51157     914                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.lm <- lm(rides~eyeballs+drivers.online,taxi_SD)
summary(fit.lm)
## 
## Call:
## lm(formula = rides ~ eyeballs + drivers.online, data = taxi_SD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.010  -6.340   0.754   5.912  32.008 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -12.22920    3.39468  -3.602 0.000417 ***
## eyeballs         0.50188    0.02307  21.754  < 2e-16 ***
## drivers.online   0.04832    0.01737   2.781 0.006052 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.85 on 165 degrees of freedom
## Multiple R-squared:  0.9635, Adjusted R-squared:  0.9631 
## F-statistic:  2181 on 2 and 165 DF,  p-value: < 2.2e-16
fit.lm.2 <- lm(rides~eyeballs+drivers.online+Hour+weekday,taxi_SD)
summary(fit.lm.2)
## 
## Call:
## lm(formula = rides ~ eyeballs + drivers.online + Hour + weekday, 
##     data = taxi_SD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.192  -6.409  -0.162   5.407  25.357 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -22.61278    5.83248  -3.877 0.000160 ***
## eyeballs             0.56540    0.03214  17.591  < 2e-16 ***
## drivers.online       0.05359    0.02145   2.498 0.013612 *  
## Hour02              10.18566    5.18527   1.964 0.051415 .  
## Hour03              13.32320    4.84029   2.753 0.006675 ** 
## Hour05              13.42633    5.73713   2.340 0.020644 *  
## Hour06               5.00578    4.58289   1.092 0.276536    
## Hour08              -6.57125    4.77844  -1.375 0.171209    
## Hour09              -7.50143    4.07735  -1.840 0.067860 .  
## Hour11              -5.52912    5.09648  -1.085 0.279783    
## Hour12              -9.31811    4.45398  -2.092 0.038186 *  
## Hour14              -6.37367    5.38245  -1.184 0.238301    
## Hour15              -5.65373    4.80285  -1.177 0.241073    
## Hour17             -14.23145    5.77526  -2.464 0.014909 *  
## Hour18             -17.80502    5.20765  -3.419 0.000818 ***
## Hour20             -17.68589    5.86232  -3.017 0.003020 ** 
## Hour21              -5.99741    5.06928  -1.183 0.238724    
## Hour23              -4.46137    5.53512  -0.806 0.421565    
## weekdayВторник      -4.26756    3.03455  -1.406 0.161783    
## weekdayПонедельник -10.71658    2.99234  -3.581 0.000467 ***
## weekdayПятница      -3.92280    3.36272  -1.167 0.245317    
## weekdayСреда         4.06617    3.28919   1.236 0.218389    
## weekdayСуббота      -1.65966    3.16712  -0.524 0.601064    
## weekdayЧетверг      -2.14187    3.17798  -0.674 0.501410    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.27 on 144 degrees of freedom
## Multiple R-squared:  0.9761, Adjusted R-squared:  0.9723 
## F-statistic: 255.6 on 23 and 144 DF,  p-value: < 2.2e-16
fit.lm.3 <- lm(rides~eyeballs+drivers.online+Hour*weekday,taxi_SD)
summary(fit.lm.3)
## 
## Call:
## lm(formula = rides ~ eyeballs + drivers.online + Hour * weekday, 
##     data = taxi_SD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.445  -2.589   0.000   2.589  15.445 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 9.38505   21.54157   0.436 0.664812    
## eyeballs                    0.64897    0.05698  11.389  5.6e-16 ***
## drivers.online             -0.06900    0.06189  -1.115 0.269859    
## Hour02                      1.48430   12.36797   0.120 0.904920    
## Hour03                      9.30613   11.84258   0.786 0.435408    
## Hour05                      7.71956   14.85586   0.520 0.605445    
## Hour06                     -2.45610   13.63505  -0.180 0.857724    
## Hour08                     -3.28934   14.46785  -0.227 0.821006    
## Hour09                     -6.91544   11.63934  -0.594 0.554897    
## Hour11                    -21.36539   12.95769  -1.649 0.104983    
## Hour12                     -3.44758   10.04753  -0.343 0.732835    
## Hour14                     -6.35319   12.11421  -0.524 0.602118    
## Hour15                     -7.19912    9.88890  -0.728 0.469758    
## Hour17                      5.25002   12.44034   0.422 0.674689    
## Hour18                      1.17486    9.84244   0.119 0.905428    
## Hour20                    -26.32177   12.07034  -2.181 0.033581 *  
## Hour21                    -21.90375    9.86017  -2.221 0.030534 *  
## Hour23                    -39.09541   12.71805  -3.074 0.003309 ** 
## weekdayВторник            -23.60394   13.14986  -1.795 0.078252 .  
## weekdayПонедельник        -17.25485   13.39983  -1.288 0.203345    
## weekdayПятница             -2.56929   10.65231  -0.241 0.810317    
## weekdayСреда              -12.64899   12.07748  -1.047 0.299617    
## weekdayСуббота             10.75283   10.12966   1.062 0.293178    
## weekdayЧетверг             -6.00696   10.98786  -0.547 0.586843    
## Hour02:weekdayВторник      13.30785   16.65071   0.799 0.427655    
## Hour03:weekdayВторник      10.78418   13.84313   0.779 0.439364    
## Hour05:weekdayВторник      11.00818   17.69721   0.622 0.536540    
## Hour06:weekdayВторник      21.22006   17.41525   1.218 0.228340    
## Hour08:weekdayВторник      13.40440   22.26473   0.602 0.549663    
## Hour09:weekdayВторник      21.23349   20.62982   1.029 0.307945    
## Hour11:weekdayВторник      39.63983   23.03400   1.721 0.090990 .  
## Hour12:weekdayВторник      19.79404   19.60717   1.010 0.317223    
## Hour14:weekdayВторник      29.28263   20.48287   1.430 0.158588    
## Hour15:weekdayВторник      26.52270   18.54432   1.430 0.158411    
## Hour17:weekdayВторник      15.29325   20.93333   0.731 0.468200    
## Hour18:weekdayВторник       8.70207   17.90013   0.486 0.628831    
## Hour20:weekdayВторник      41.52176   20.63336   2.012 0.049181 *  
## Hour21:weekdayВторник      35.44198   16.68026   2.125 0.038198 *  
## Hour23:weekdayВторник      68.03099   19.04544   3.572 0.000754 ***
## Hour02:weekdayПонедельник  -1.53486   16.64007  -0.092 0.926850    
## Hour03:weekdayПонедельник  -0.70858   13.86367  -0.051 0.959426    
## Hour05:weekdayПонедельник   7.24825   17.91848   0.405 0.687434    
## Hour06:weekdayПонедельник   3.54477   16.98429   0.209 0.835461    
## Hour08:weekdayПонедельник  15.00797   21.38532   0.702 0.485825    
## Hour09:weekdayПонедельник  21.32006   20.39748   1.045 0.300573    
## Hour11:weekdayПонедельник  24.89984   22.24542   1.119 0.267956    
## Hour12:weekdayПонедельник  -4.36018   17.77082  -0.245 0.807111    
## Hour14:weekdayПонедельник  -3.70970   19.67901  -0.189 0.851183    
## Hour15:weekdayПонедельник  22.06838   17.47880   1.263 0.212166    
## Hour17:weekdayПонедельник  -8.21005   20.15112  -0.407 0.685307    
## Hour18:weekdayПонедельник -32.51850   16.26196  -2.000 0.050578 .  
## Hour20:weekdayПонедельник  24.23434   19.29934   1.256 0.214628    
## Hour21:weekdayПонедельник  12.45180   15.57012   0.800 0.427375    
## Hour23:weekdayПонедельник  29.37908   18.63624   1.576 0.120763    
## Hour02:weekdayПятница      -0.51320   16.68594  -0.031 0.975577    
## Hour03:weekdayПятница      -7.38768   13.59390  -0.543 0.589053    
## Hour05:weekdayПятница      -2.47733   16.89794  -0.147 0.883989    
## Hour06:weekdayПятница       2.36796   15.64521   0.151 0.880261    
## Hour08:weekdayПятница      -9.64732   20.41822  -0.472 0.638484    
## Hour09:weekdayПятница      -2.36162   19.57564  -0.121 0.904423    
## Hour11:weekdayПятница      36.30393   21.56372   1.684 0.098039 .  
## Hour12:weekdayПятница       9.63358   18.42579   0.523 0.603230    
## Hour14:weekdayПятница      14.40765   21.20848   0.679 0.499828    
## Hour15:weekdayПятница      15.21197   18.20319   0.836 0.407020    
## Hour17:weekdayПятница     -23.65605   20.84530  -1.135 0.261455    
## Hour18:weekdayПятница      -5.46916   17.95460  -0.305 0.761834    
## Hour20:weekdayПятница      37.00197   22.89709   1.616 0.111920    
## Hour21:weekdayПятница      42.13300   20.46109   2.059 0.044313 *  
## Hour23:weekdayПятница      34.91086   23.23360   1.503 0.138768    
## Hour02:weekdayСреда         8.71869   16.64982   0.524 0.602664    
## Hour03:weekdayСреда         5.86380   14.17654   0.414 0.680785    
## Hour05:weekdayСреда         5.82457   17.64040   0.330 0.742539    
## Hour06:weekdayСреда        25.37758   17.41403   1.457 0.150823    
## Hour08:weekdayСреда        14.84997   22.46531   0.661 0.511411    
## Hour09:weekdayСреда        17.24590   20.71477   0.833 0.408771    
## Hour11:weekdayСреда        60.84655   23.41818   2.598 0.012053 *  
## Hour12:weekdayСреда        30.47752   20.36167   1.497 0.140266    
## Hour14:weekdayСреда        30.58834   22.94877   1.333 0.188162    
## Hour15:weekdayСреда        35.12090   19.74220   1.779 0.080872 .  
## Hour17:weekdayСреда         2.19771   21.57071   0.102 0.919226    
## Hour18:weekdayСреда        19.00018   19.70162   0.964 0.339146    
## Hour20:weekdayСреда        56.69162   22.01444   2.575 0.012790 *  
## Hour21:weekdayСреда        37.88181   17.47478   2.168 0.034601 *  
## Hour23:weekdayСреда        62.11980   20.07158   3.095 0.003117 ** 
## Hour02:weekdayСуббота      11.60694   16.70285   0.695 0.490092    
## Hour03:weekdayСуббота     -13.55014   13.77600  -0.984 0.329697    
## Hour05:weekdayСуббота     -10.19309   16.84307  -0.605 0.547594    
## Hour06:weekdayСуббота     -10.15156   13.64689  -0.744 0.460178    
## Hour08:weekdayСуббота     -18.67337   16.68344  -1.119 0.267977    
## Hour09:weekdayСуббота     -16.88287   13.58991  -1.242 0.219491    
## Hour11:weekdayСуббота      -7.34141   16.69694  -0.440 0.661919    
## Hour12:weekdayСуббота     -20.90088   13.60845  -1.536 0.130408    
## Hour14:weekdayСуббота      -4.28590   16.65187  -0.257 0.797862    
## Hour15:weekdayСуббота       0.75886   13.66134   0.056 0.955907    
## Hour17:weekdayСуббота     -16.30568   16.80311  -0.970 0.336175    
## Hour18:weekdayСуббота     -23.85605   13.64368  -1.749 0.086060 .  
## Hour20:weekdayСуббота       1.56650   17.04554   0.092 0.927117    
## Hour21:weekdayСуббота      12.43388   14.03225   0.886 0.379497    
## Hour23:weekdayСуббота      21.47397   17.07963   1.257 0.214061    
## Hour02:weekdayЧетверг      -1.64733   16.78168  -0.098 0.922167    
## Hour03:weekdayЧетверг      -5.54749   13.67365  -0.406 0.686561    
## Hour05:weekdayЧетверг      -4.27536   17.13012  -0.250 0.803858    
## Hour06:weekdayЧетверг       8.38840   16.30007   0.515 0.608916    
## Hour08:weekdayЧетверг     -21.08400   20.27879  -1.040 0.303109    
## Hour09:weekdayЧетверг       6.95224   18.90131   0.368 0.714447    
## Hour11:weekdayЧетверг      28.83858   22.68628   1.271 0.209109    
## Hour12:weekdayЧетверг       6.40075   17.91573   0.357 0.722282    
## Hour14:weekdayЧетверг      28.11931   20.54526   1.369 0.176775    
## Hour15:weekdayЧетверг      16.44114   17.76530   0.925 0.358843    
## Hour17:weekdayЧетверг      -0.59939   20.36594  -0.029 0.976629    
## Hour18:weekdayЧетверг      12.33778   17.67592   0.698 0.488173    
## Hour20:weekdayЧетверг      17.77602   20.48111   0.868 0.389277    
## Hour21:weekdayЧетверг      34.83525   16.93431   2.057 0.044522 *  
## Hour23:weekdayЧетверг      52.65392   19.22353   2.739 0.008331 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.606 on 54 degrees of freedom
## Multiple R-squared:  0.9922, Adjusted R-squared:  0.9758 
## F-statistic: 60.49 on 113 and 54 DF,  p-value: < 2.2e-16
anova(fit.lm,fit.lm.2,fit.lm.3)
hist(fit.lm.2$residuals,col = "blue")

shapiro.test(fit.lm.2$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  fit.lm.2$residuals
## W = 0.9936, p-value = 0.6731
par(mfrow=c(2,3))
plot(fit.lm.2,which = c(1:6))

ggplot(taxi_SD, aes(eyeballs, rides,size=drivers.online,col=weekday)) + geom_point() + geom_smooth(method = lm,se=F)

taxi_SD %>% group_by(weekday) %>% do(tidy(lm(rides~eyeballs+drivers.online,data =.)))
ggplot(taxi_SD, aes(eyeballs, rides)) + geom_point() + geom_smooth(method = loess,se=T) + facet_wrap(~weekday)

ggplot(taxi_SD, aes(drivers.online, rides)) + geom_point() + geom_smooth(method = loess,se=T) + facet_wrap(~weekday)

taxi3d <- taxi_SD[,2:4]
s3d <- scatterplot3d(taxi3d,grid = T,pch=16, highlight.3d=TRUE,type="h")
fit.3d <- lm(rides~.,data = taxi3d)
s3d$plane3d(fit.3d,draw_polygon = T)