Task

Uber trip data:
− Load the data [uber-data.csv]
− Discover and comment clusters of Uber data based on locations (longitude & latitude)
− Analyze the cluster centers by time
− Analyze the cluster centers by date
− Remember to choose the right algorithm, compute the optimal number of clusters and quality measures
− Develop adequate plots
− Apply the data set for forecasting

Preparation

library(DataSum)
library(caTools)
library(tidyverse)
library(factoextra)
library(stats)
library(flexclust)
library(mclust)
library(fpc)
library(clustertend)
library(cluster)
library(ClusterR)
library(hms)
library(ggmap)
library(tidyr)
library(ggmap)
library(ggplot2)

Exclude the found number of rows and upload the data set.

setwd("C:/Users/ydmar/Documents/UW/UW - 1 semester/UL")
uber_data <- read_csv("uber-data.csv")
spec(uber_data)
## cols(
##   `Date/Time` = col_character(),
##   Lat = col_double(),
##   Lon = col_double(),
##   Base = col_character()
## )
unlist(lapply(uber_data,class))
##   Date/Time         Lat         Lon        Base 
## "character"   "numeric"   "numeric" "character"

Check NA values in the data set.

na_rows <- uber_data[!complete.cases(uber_data[,c("Date/Time","Lat","Lon","Base")]),]

if (nrow(na_rows) > 0) {
  print(na_rows)
} else {
  print("There are no NA values")
}
## [1] "There are no NA values"

Since data set doesn’t have NA values, we can continue analysis just with the data.

Calculate the mode of latitude and longitude values

Lat_mode <- getmode(uber_data$Lat)
Lat_mode
## [1] 40.774
Lon_mode <- getmode(uber_data$Lon)
Lon_mode
## [1] -73.9888

Split data and time for the further analysis

uber_data$dt <-uber_data$`Date/Time`
uber_data <- separate(data = uber_data, col = dt, into = c("Date", "Time"), sep = "\\s")
uber_data$`Date/Time` <- as.POSIXct(uber_data$`Date/Time`, format = "%m/%d/%Y %H:%M:%S")
uber_data$Date <- as.Date(uber_data$Date,format='%m/%d/%Y')
uber_data$Time <- as_hms(uber_data$Time)

Another look at the data set’s structure

str(uber_data)
## tibble [1,028,136 × 6] (S3: tbl_df/tbl/data.frame)
##  $ Date/Time: POSIXct[1:1028136], format: "2014-09-01 00:01:00" "2014-09-01 00:01:00" ...
##  $ Lat      : num [1:1028136] 40.2 40.8 40.8 40.7 40.8 ...
##  $ Lon      : num [1:1028136] -74 -74 -74 -74 -73.9 ...
##  $ Base     : chr [1:1028136] "B02512" "B02512" "B02512" "B02512" ...
##  $ Date     : Date[1:1028136], format: "2014-09-01" "2014-09-01" ...
##  $ Time     : 'hms' num [1:1028136] 00:01:00 00:01:00 00:03:00 00:06:00 ...
##   ..- attr(*, "units")= chr "secs"
head(uber_data,5)
## # A tibble: 5 × 6
##   `Date/Time`           Lat   Lon Base   Date       Time  
##   <dttm>              <dbl> <dbl> <chr>  <date>     <time>
## 1 2014-09-01 00:01:00  40.2 -74.0 B02512 2014-09-01 01'00"
## 2 2014-09-01 00:01:00  40.8 -74.0 B02512 2014-09-01 01'00"
## 3 2014-09-01 00:03:00  40.8 -74.0 B02512 2014-09-01 03'00"
## 4 2014-09-01 00:06:00  40.7 -74.0 B02512 2014-09-01 06'00"
## 5 2014-09-01 00:11:00  40.8 -73.9 B02512 2014-09-01 11'00"

Analysis of the data set

Applying CLARA clustering method since data set is quite large (over 1 m. rows).

The clusters are going to be created by geographical variables (longitude and latitude).

Plot the data set:

ggplot(uber_data, aes(Lat, Lon, alpha = .001)) +
  geom_point(show.legend = FALSE) +
  theme_minimal() +
  ggtitle('All Uber pickups - Sep') +
  xlab('Latitude') +
  ylab('Longitude')

We can assume, that there are 3 clusters and outliers on the graph.That’s why it’s quite reasonable to apply CLARA method since CLARA draws multiple samples of the data set, then applies PAM on each sample, and gives the best clustering as the output. We are going to run a few iterations of CLARA (from 3 up to 5-7). For clustering computations set the number of samples = 6 and sample size = 1000. Lets use Euclidean distance.

z-score standardization

geo <- uber_data[2:3]
str(geo)
## tibble [1,028,136 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Lat: num [1:1028136] 40.2 40.8 40.8 40.7 40.8 ...
##  $ Lon: num [1:1028136] -74 -74 -74 -74 -73.9 ...
geo_z <- as.data.frame(lapply(geo,scale))

Start with 3 clusters and 6 samples specified

clara_clust_3<-clara(geo_z, 3, metric="euclidean", stand=FALSE, samples=6,
                     sampsize=1000, trace=0, medoids.x=TRUE,
                     rngR=FALSE, pamLike=FALSE, correct.d=TRUE)
class(clara_clust_3)
## [1] "clara"     "partition"
clara_clust_3
## Call:     clara(x = geo_z, k = 3, metric = "euclidean", stand = FALSE, samples = 6, sampsize = 1000, trace = 0, medoids.x = TRUE, rngR = FALSE, pamLike = FALSE, correct.d = TRUE) 
## Medoids:
##             Lat         Lon
## [1,] -1.3451686  0.20092689
## [2,] -0.2087105 -0.40955981
## [3,]  0.6191405  0.01229336
## Objective function:   0.6377632
## Clustering vector:    int [1:1028136] 1 2 3 2 3 1 3 2 1 3 1 3 1 1 2 2 3 2 ...
## Cluster sizes:            155513 446220 426403 
## Best sample:
##    [1]    2558    2887    3609    4236    6590    6699    8268    9570   10480
##   [10]   11029   11421   13335   13398   16144   16536   16865   17257   19313
##   [19]   19956   21603   22042   22074   23815   25415   26529   27690   27816
##   [28]   28490   29761   30357   32961   33118   35299   36115   37777   37809
##   [37]   38954   38970   40727   41056   41103   42766   43394   43441   44508
##   [46]   45433   46312   47881   48618   49700   49983   51238   52556   53622
##   [55]   54281   54846   55678   55693   56133   57403   58643   61043   61451
##   [64]   62502   62973   63380   64306   65781   66032   66769   67365   71727
##   [73]   74441   75272   77155   79492   81924   82551   83414   85736   86395
##   [82]   86662   87885   88372   88623   89313   91650   91713   92905   94851
##   [91]   94961   99322  100216  100232  100702  101581  101973  102130  102350
##  [100]  105629  106523  110963  111778  112641  113614  114430  115214  116124
##  [109]  116312  119120  119481  121081  121411  121709  122791  122948  128298
##  [118]  129553  130133  130416  130871  131938  132283  132800  133491  134008
##  [127]  134353  134448  135656  136550  136958  139138  139264  141178  141288
##  [136]  142449  143264  143500  144880  145680  146700  147908  148143  149351
##  [145]  149665  150685  150983  151093  152222  152285  153587  155140  155391
##  [154]  155470  155956  157744  159376  159517  160866  162137  163220  163267
##  [163]  163800  167110  167126  167502  168052  168789  169965  170232  170420
##  [172]  171221  172899  173166  173182  173888  174311  174374  175095  176586
##  [181]  177731  177982  179833  180445  181465  184226  184540  184838  185356
##  [190]  185873  189262  191929  192509  192745  194204  194408  194439  194627
##  [199]  195270  195600  196949  197733  200024  200322  200761  202063  202942
##  [208]  203554  204511  206315  206331  206409  206534  206613  206629  207868
##  [217]  208480  209154  210911  211476  211633  212323  212763  215273  216292
##  [226]  216732  217955  219132  219775  222254  223227  223399  223995  225078
##  [235]  227776  228357  228717  231902  231981  233298  233785  234397  235134
##  [244]  238821  240170  243292  244343  244751  247167  248281  249536  249865
##  [253]  251481  253160  254069  255387  256846  258603  258980  260909  262133
##  [262]  263153  264455  265757  266432  266996  267091  267342  268032  268377
##  [271]  269256  271060  271844  272158  272754  272942  273680  273805  275264
##  [280]  276111  276158  278951  286183  288913  289085  289603  289681  289948
##  [289]  290466  290497  290544  290638  291078  293180  293588  296098  297839
##  [298]  299079  299894  301667  302310  304538  307613  309135  310217  311174
##  [307]  312100  312272  312555  315818  316147  317983  319850  319960  320226
##  [316]  324289  324682  325435  326596  327851  329404  329796  330518  332573
##  [325]  333969  334110  334675  335302  335334  337907  341468  341625  342048
##  [334]  342503  342519  343617  345845  345892  346221  346378  349375  351038
##  [343]  351069  351257  351618  352214  352230  353579  353814  354364  354662
##  [352]  356450  356701  357423  358693  359572  359996  361141  361878  362961
##  [361]  364498  365706  365784  367542  368263  368467  368953  369393  369518
##  [370]  371903  374303  374413  376750  379041  379292  379480  380625  381096
##  [379]  383967  384987  385128  385253  387199  387701  388532  390415  391764
##  [388]  392752  396392  396674  398494  399655  401632  401946  404911  406166
##  [397]  408111  411406  413116  413476  413492  414841  415610  415673  418889
##  [406]  419783  421744  422387  423470  427690  428474  428788  429384  429572
##  [415]  431878  431894  432381  432741  434404  434671  436052  436930  438640
##  [424]  440852  443174  443833  445543  445810  446061  446233  446578  446751
##  [433]  447174  447269  447833  450218  452399  452524  452728  453010  455709
##  [442]  456760  457670  458140  458940  461168  464243  466847  468400  468651
##  [451]  468730  469216  472777  473530  474723  476480  476527  476558  476856
##  [460]  477060  478849  480229  480386  482049  482065  483226  483492  483634
##  [469]  484481  485453  486159  487571  487634  487854  488246  490034  490599
##  [478]  490991  493093  494725  496702  497486  498098  498255  498616  499133
##  [487]  502522  504138  505581  505769  507479  507668  507699  507887  508123
##  [496]  508531  509660  509723  510209  512719  512829  514021  515182  515324
##  [505]  516814  517771  519575  519669  519889  520705  521238  521740  522415
##  [514]  524893  524940  526148  526227  527670  527858  528533  529553  530337
##  [523]  530619  531043  531812  532533  535169  535420  537256  537726  538903
##  [532]  541664  541978  547045  548394  549947  551642  555234  557603  557760
##  [541]  558011  558199  562796  563125  563847  563972  564067  565918  566420
##  [550]  566561  567330  567816  568036  568647  570106  571644  572240  572303
##  [559]  573730  574170  575393  576413  577213  579017  579692  580351  580665
##  [568]  582516  583661  585104  585214  585418  586202  586736  588524  589371
##  [577]  589418  590736  591034  591834  592101  592211  595270  596259  597608
##  [586]  598973  601012  601781  602189  602863  603208  603804  604463  604605
##  [595]  606848  607303  609358  609640  610597  610801  611099  611507  611868
##  [604]  612339  612825  615571  619398  621626  621893  624434  624528  624779
##  [613]  625360  625470  625815  630161  631117  631243  631353  633486  636389
##  [622]  637816  638695  640170  640264  640797  641111  642083  644876  646131
##  [631]  646523  646664  647119  647229  647370  647904  647935  649959  651167
##  [640]  654728  654885  655277  655309  655764  655779  659105  659152  660376
##  [649]  663153  664110  664878  666573  666839  667075  667922  669349  669710
##  [658]  670683  670887  673256  677397  678966  679045  679327  679955  681523
##  [667]  681727  682778  686544  686842  687673  689116  690011  690183  692599
##  [676]  692740  692772  694356  694733  695345  697227  698388  698514  699486
##  [685]  699533  699957  700459  701055  703659  703816  705024  707597  707911
##  [694]  709056  709652  711252  711754  711864  712100  714139  714861  715206
##  [703]  718485  719112  719316  723191  724446  724666  725356  726376  728274
##  [712]  728933  731741  734832  735004  735647  736479  736730  738691  740291
##  [721]  741405  742566  742691  743366  745139  745154  747664  750175  750190
##  [730]  751900  754112  755603  755932  756434  757093  757642  758270  759070
##  [739]  759383  759493  759838  760435  761093  764859  765988  766271  767431
##  [748]  768498  769722  770553  770930  772279  776327  777378  777848  778256
##  [757]  779182  781645  786791  787983  789818  790116  792109  793489  794446
##  [766]  795325  796800  796894  797427  798714  800612  801114  801506  801537
##  [775]  802761  803295  803498  804189  806589  809836  809962  811515  814198
##  [784]  815578  816849  817006  817398  820740  821383  822920  822983  825838
##  [793]  825980  826089  826654  827517  828443  828490  833965  834498  835957
##  [802]  836585  837824  838201  839409  839487  840930  841119  843174  843597
##  [811]  843880  844303  844429  845009  845072  845292  845747  845794  846813
##  [820]  848429  848680  849229  849323  850531  850986  851363  851426  852163
##  [829]  854924  856054  856163  858140  861576  863019  863207  864227  864541
##  [838]  864902  865561  865969  867098  867161  868494  871020  871459  874252
##  [847]  875115  875742  877013  877107  877233  877327  879178  879821  881076
##  [856]  881296  881986  882378  882927  884904  885296  885563  886991  888057
##  [865]  888763  889971  890473  891462  892858  893925  895321  896921  899416
##  [874]  899996  903996  904295  905095  906805  907620  908530  909079  909519
##  [883]  910476  912233  912609  914272  914900  915041  915198  915449  917818
##  [892]  917865  920563  921410  921489  922744  922901  923858  924062  924768
##  [901]  925128  925787  926085  926352  927199  927638  931608  932831  934008
##  [910]  934478  934651  934886  935153  937130  937789  938040  938275  938730
##  [919]  939954  942652  943232  943421  944378  946778  946856  948174  949649
##  [928]  951077  953430  953524  953696  954057  955046  955344  958136  958168
##  [937]  959391  959783  959925  960380  961164  962042  963156  963219  966357
##  [946]  968537  973479  973636  977370  979331  979833  981872  982217  982610
##  [955]  982908  983253  984147  985120  985935  987630  988555  988681  992587
##  [964]  993215  999804 1000102 1001059 1002377 1003443 1003789 1003961 1004557
##  [973] 1004808 1005342 1005373 1005514 1005859 1005954 1007993 1008056 1008605
##  [982] 1012746 1012794 1013217 1014315 1014770 1016543 1016920 1017076 1019414
##  [991] 1020857 1021171 1022316 1024513 1025093 1025124 1025360 1027148 1027399
## [1000] 1028121
## 
## Available components:
##  [1] "sample"     "medoids"    "i.med"      "clustering" "objective" 
##  [6] "clusinfo"   "diss"       "call"       "silinfo"    "data"

Plot the output

fviz_cluster(clara_clust_3, geom="point", ellipse.type="norm")

fviz_cluster(clara_clust_3, palette=c("#00AFBB", "#FC4E07", "#E7B800"), ellipse.type="t", geom="point", pointsize=1, ggtheme=theme_classic())

fviz_silhouette(clara_clust_3)
##   cluster size ave.sil.width
## 1       1  159          0.01
## 2       2  387          0.56
## 3       3  454          0.27

Continue using this approach since it allows us to have more granular control over the clustering process.

4 clusters and 6 samples specified

clara_clust_4<-clara(geo_z, 4, metric="euclidean", stand=FALSE, samples=6,
                     sampsize=1000, trace=0, medoids.x=TRUE,
                     rngR=FALSE, pamLike=FALSE, correct.d=TRUE)
class(clara_clust_4)
## [1] "clara"     "partition"
clara_clust_4
## Call:     clara(x = geo_z, k = 4, metric = "euclidean", stand = FALSE, samples = 6, sampsize = 1000, trace = 0, medoids.x = TRUE, rngR = FALSE, pamLike = FALSE, correct.d = TRUE) 
## Medoids:
##             Lat         Lon
## [1,] -2.3101783  3.25507520
## [2,] -0.1572759 -0.41127466
## [3,]  0.6289375  0.01572306
## [4,] -1.2545458  0.07231312
## Objective function:   0.550237
## Clustering vector:    int [1:1028136] 1 2 3 2 3 4 1 2 1 3 1 3 4 1 2 2 3 2 ...
## Cluster sizes:            35174 450112 411728 131122 
## Best sample:
##    [1]    2558    2887    3609    4236    6590    6699    8268    9570    9868
##   [10]   10480   11029   11421   13335   13398   16144   16536   16865   17257
##   [19]   19313   19956   21603   22042   22074   23815   25415   26529   27690
##   [28]   27816   28490   29761   30357   32961   33118   35299   36115   37777
##   [37]   37809   38954   38970   40727   41056   41103   42766   43394   43441
##   [46]   44508   45433   46312   47881   48618   49700   49983   51238   52556
##   [55]   53622   54281   54846   55678   55693   56133   57403   58643   61043
##   [64]   61451   62502   62973   63380   64306   65781   66032   66769   67365
##   [73]   71727   74441   75272   77155   79492   81924   82551   83414   85736
##   [82]   86395   86662   87885   88372   88623   89313   91650   91713   92905
##   [91]   94851   94961   99322  100216  100232  100702  101581  101973  102130
##  [100]  102350  105629  106523  110963  111778  112641  113614  114430  115214
##  [109]  116124  116312  119120  119481  121081  121411  121709  122791  122948
##  [118]  128298  129553  130133  130416  130871  132283  132800  133491  134008
##  [127]  134353  134448  135656  136550  136958  139138  139264  141178  141288
##  [136]  142449  143264  143500  144880  145680  146700  147908  148143  149351
##  [145]  149665  150685  150983  151093  152222  152285  153587  155140  155391
##  [154]  155470  155956  157744  159376  159517  160866  162137  163220  163267
##  [163]  163800  167110  167126  167502  168052  168789  169965  170232  170420
##  [172]  171221  172899  173166  173182  173888  174311  174374  175095  176586
##  [181]  177731  177982  179833  180445  181465  184226  184540  184838  185356
##  [190]  185873  189262  191929  192509  192745  194204  194408  194439  194627
##  [199]  195270  195600  196949  197733  200024  200322  200761  202063  202942
##  [208]  203554  204511  206315  206331  206409  206534  206613  206629  207868
##  [217]  208480  209154  210911  211476  211633  212323  212763  215273  216292
##  [226]  216732  217955  219132  219775  222254  223227  223399  223995  225078
##  [235]  227776  228357  228717  231902  231981  233298  233785  234397  235134
##  [244]  238821  240170  243292  244343  244751  247167  248281  249536  249865
##  [253]  251481  253160  254069  255387  256846  258603  258980  260909  262133
##  [262]  263153  264455  265757  266432  266996  267091  267342  268032  268377
##  [271]  269256  271060  271844  271954  272158  272754  272942  273680  273805
##  [280]  275264  276111  276158  278951  286183  288913  289085  289603  289681
##  [289]  289948  290466  290497  290544  290638  291078  293180  293588  296098
##  [298]  297839  299079  299894  301667  302310  304538  307613  309135  310217
##  [307]  311174  312100  312555  315818  316147  317983  319850  319960  320226
##  [316]  324289  324682  325435  326596  327851  329404  329796  330518  332573
##  [325]  333969  334110  334675  335302  335334  337907  341468  341625  342048
##  [334]  342503  342519  343617  345845  345892  346221  346378  349375  351038
##  [343]  351069  351257  351618  352214  352230  352810  353579  353814  354364
##  [352]  354662  356450  356701  357423  358693  359572  359996  361141  361878
##  [361]  362961  364498  365706  365784  367542  368263  368467  368953  369393
##  [370]  369518  371903  374303  374413  376750  379041  379292  379480  380625
##  [379]  381096  383967  384987  385128  385253  387199  387701  388532  390415
##  [388]  391764  392752  396392  396674  398494  399655  401632  401946  404911
##  [397]  406166  408111  411406  413116  413476  413492  414841  415610  415673
##  [406]  418889  419783  421744  422387  423470  427690  428474  428788  429384
##  [415]  429572  431878  431894  432381  432741  434404  434671  436052  436930
##  [424]  438640  440852  443174  443833  445543  445810  446061  446233  446578
##  [433]  446751  447174  447269  447833  448351  450218  452399  452524  452728
##  [442]  453010  455709  456760  457670  458140  458940  461168  464243  466847
##  [451]  468400  468651  468730  469216  472777  473530  474723  476480  476527
##  [460]  476558  476856  477060  478849  480229  480386  482049  482065  483226
##  [469]  483492  483634  484481  485453  486159  487571  487634  487854  488246
##  [478]  490034  490599  490991  493093  494725  496702  497486  498098  498255
##  [487]  498616  499133  502522  504138  505581  505769  507479  507668  507699
##  [496]  507887  508123  508531  509660  509723  510209  512719  512829  514021
##  [505]  515182  515324  516814  517771  519575  519669  520705  521238  521740
##  [514]  522415  524893  524940  526148  526227  527670  527858  528533  529553
##  [523]  530337  530619  531043  531812  532533  535169  535420  537256  537726
##  [532]  538903  541664  541978  547045  548394  549947  551642  555234  557603
##  [541]  557760  558011  558199  562796  563125  563847  563972  564067  565918
##  [550]  566420  566561  567330  567816  568036  568647  570106  571644  572240
##  [559]  572303  573730  574170  575393  576413  577213  579017  579692  580351
##  [568]  580665  582516  583661  585104  585214  585418  586202  586736  588524
##  [577]  589371  589418  590736  591034  591834  592211  595270  596259  597608
##  [586]  598973  601012  601781  602189  602863  603208  603804  604463  604605
##  [595]  606848  607303  609358  609640  610597  610801  611099  611507  611868
##  [604]  612339  612825  615571  621626  621893  624434  624528  624779  625360
##  [613]  625470  625815  630161  631117  631243  631353  633486  636389  637816
##  [622]  638695  640170  640264  640797  641111  642083  644876  646131  646523
##  [631]  646664  647119  647229  647370  647904  647935  649959  651167  654728
##  [640]  654885  655277  655309  655764  655779  659105  659152  660376  664110
##  [649]  664878  666573  666839  667075  667922  669349  669710  670683  670887
##  [658]  673256  677397  678966  679045  679327  679955  681523  681727  682778
##  [667]  686544  686842  687673  689116  690011  690183  692599  692740  692772
##  [676]  694356  694733  695345  697227  698388  698514  699486  699533  699957
##  [685]  700459  701055  703659  703816  705024  707597  707911  709056  709652
##  [694]  711252  711754  711864  712100  714139  714861  715206  718485  719112
##  [703]  719316  723191  724446  724666  725356  726376  728274  728933  731741
##  [712]  734832  735004  735647  736479  736730  738691  740291  741405  742566
##  [721]  742691  743366  745139  745154  747664  750175  750190  751900  754112
##  [730]  755603  755932  756434  757093  757642  758270  759070  759383  759493
##  [739]  759838  760435  761093  764859  765988  766271  767431  768498  769722
##  [748]  770553  770930  772279  774899  776327  777378  777848  778256  779182
##  [757]  781645  786791  787983  789818  790116  792109  793489  794446  795325
##  [766]  796800  796894  797427  798714  800612  801114  801506  801537  802761
##  [775]  803295  803498  804189  806589  809836  809962  811515  814198  815578
##  [784]  816849  817006  817398  820740  821383  822920  822983  825838  825980
##  [793]  826089  826654  827517  828443  828490  833965  834498  835957  836585
##  [802]  837824  838201  839409  839487  840930  841119  843174  843597  843880
##  [811]  844303  844429  845009  845072  845292  845747  845794  846813  848429
##  [820]  848680  849229  849323  850531  850986  851363  851426  852163  854924
##  [829]  856054  856163  858140  861576  863019  863207  864227  864541  864902
##  [838]  865561  865969  867098  867161  868494  871020  871459  874252  875115
##  [847]  875742  877013  877107  877233  877327  879178  879821  881076  881296
##  [856]  881986  882378  882927  884904  885296  885563  886991  888042  888057
##  [865]  888763  889971  890473  891462  892858  893925  895321  896921  899416
##  [874]  899996  903996  904295  905095  906805  907620  908530  909079  909519
##  [883]  910476  912233  912609  914272  914900  915041  915198  915449  917818
##  [892]  917865  920563  921410  921489  922744  922901  923858  924062  924768
##  [901]  925128  925787  926085  926352  927199  927638  931608  932831  934008
##  [910]  934478  934651  934886  935153  937130  937789  938040  938275  938730
##  [919]  939954  942652  943232  943421  944378  946778  946856  948174  949649
##  [928]  951077  953430  953524  953696  954057  955046  955344  958136  958168
##  [937]  959391  959783  959925  960380  961164  962042  963156  963219  966357
##  [946]  968537  973479  973636  977370  979331  979833  981872  982217  982610
##  [955]  982908  983253  984147  985120  985935  987630  988555  988681  992587
##  [964]  993215  999804 1000102 1001059 1002377 1003443 1003789 1003961 1004557
##  [973] 1004808 1005342 1005373 1005514 1005859 1005954 1007993 1008056 1008605
##  [982] 1012746 1012794 1013217 1014315 1014770 1016543 1016920 1017076 1019414
##  [991] 1020857 1021171 1022316 1024513 1025093 1025124 1025360 1027148 1027399
## [1000] 1028121
## 
## Available components:
##  [1] "sample"     "medoids"    "i.med"      "clustering" "objective" 
##  [6] "clusinfo"   "diss"       "call"       "silinfo"    "data"

Plot the output

fviz_cluster(clara_clust_4, geom="point", ellipse.type="norm")

fviz_cluster(clara_clust_4, palette=c("#00AFBB", "#FC4E07", "#E7B800","#7E57C2"), ellipse.type="t", geom="point", pointsize=1, ggtheme=theme_classic())

fviz_silhouette(clara_clust_4)
##   cluster size ave.sil.width
## 1       1   36          0.58
## 2       2  383          0.51
## 3       3  446          0.29
## 4       4  135          0.28

5 clusters and 6 samples specified

clara_clust_5<-clara(geo_z, 5, metric="euclidean", stand=FALSE, samples=6,
                     sampsize=1000, trace=0, medoids.x=TRUE,
                     rngR=FALSE, pamLike=FALSE, correct.d=TRUE)
class(clara_clust_5)
## [1] "clara"     "partition"
clara_clust_5
## Call:     clara(x = geo_z, k = 5, metric = "euclidean", stand = FALSE, samples = 6, sampsize = 1000, trace = 0, medoids.x = TRUE, rngR = FALSE, pamLike = FALSE, correct.d = TRUE) 
## Medoids:
##             Lat        Lon
## [1,] -2.3077290  3.2533604
## [2,]  0.4280980 -0.1780550
## [3,]  0.9914285  0.3072476
## [4,] -1.3745597 -0.0305779
## [5,] -0.3238258 -0.4507162
## Objective function:   0.4873376
## Clustering vector:    int [1:1028136] 1 2 2 2 3 4 1 5 4 2 1 3 4 1 5 5 3 5 ...
## Cluster sizes:            35189 328853 180843 115904 367347 
## Best sample:
##    [1]    2558    2887    3609    4236    6590    6699    8268    9570   10480
##   [10]   11029   11421   13335   13398   16144   16536   16865   17257   19313
##   [19]   19956   21603   22042   22074   23815   25415   26529   27690   27816
##   [28]   28490   29761   30357   32961   33118   35299   36115   37777   37809
##   [37]   38954   38970   40727   41056   41103   42766   43394   43441   44508
##   [46]   45433   46312   47881   48618   49700   49983   51238   52556   53622
##   [55]   54281   54846   55678   55693   56133   57403   58643   61043   61451
##   [64]   62502   62973   63380   64306   65781   66032   66769   67365   71727
##   [73]   74441   75272   77155   79492   81924   82551   83414   85736   86395
##   [82]   86662   87195   87885   88372   88623   89313   91650   91713   92905
##   [91]   94851   94961   99322  100216  100232  100702  101581  101973  102130
##  [100]  102350  104499  105629  106523  110963  111778  112641  113614  114430
##  [109]  115214  116124  116312  119120  119481  121081  121411  121709  122791
##  [118]  122948  128298  129553  130133  130416  130871  132283  132800  133491
##  [127]  134008  134353  134448  135656  136550  136958  139138  139264  141178
##  [136]  141288  142449  143264  143500  144880  145680  146700  147908  148143
##  [145]  149351  149665  150685  150983  151093  152222  152285  153587  155140
##  [154]  155391  155470  155956  157744  159376  159517  160866  162137  163220
##  [163]  163267  163800  167110  167126  167502  168052  168789  169965  170232
##  [172]  170420  171221  172899  173166  173182  173888  174311  174374  175095
##  [181]  176586  177731  177982  179833  180445  181465  184226  184540  184838
##  [190]  185356  185873  189262  191929  192509  192745  194204  194408  194439
##  [199]  194627  195270  195600  196949  197733  200024  200322  200761  202063
##  [208]  202942  203554  203569  204511  206315  206331  206409  206534  206613
##  [217]  206629  207868  208480  209154  210911  211476  211633  212323  212763
##  [226]  215273  216292  216732  217955  219132  219775  222254  223227  223399
##  [235]  223995  225078  227776  228357  228717  231902  231981  233298  233785
##  [244]  234397  235134  238821  240170  243292  244343  244751  247167  248281
##  [253]  249536  249865  251481  253160  254069  255387  256846  258603  258980
##  [262]  260909  262133  263153  264455  265757  266432  266996  267091  267342
##  [271]  268032  268377  269256  271060  271844  271954  272158  272754  272942
##  [280]  273680  273805  275264  276111  276158  278951  280865  286183  288913
##  [289]  289085  289603  289681  289948  290466  290497  290544  290638  291078
##  [298]  293180  293588  296098  297839  299079  299894  301667  302310  304538
##  [307]  307613  309135  310217  311174  312100  312555  315818  316147  317983
##  [316]  319850  319960  320226  324289  324682  325435  326596  327851  329404
##  [325]  329796  330518  332573  333969  334110  334675  335302  335334  337907
##  [334]  341468  341625  342048  342503  342519  343617  345845  345892  346221
##  [343]  346378  349375  351038  351069  351257  351618  352214  352230  353579
##  [352]  353814  354364  354662  356450  356701  357423  358693  359572  359996
##  [361]  361141  361878  362961  364498  365706  365784  367542  368263  368467
##  [370]  368953  369393  369518  371903  374303  374413  376750  379041  379292
##  [379]  379480  380625  381096  383967  384987  385128  385253  387199  387701
##  [388]  388532  390415  391764  392752  396392  396674  398494  399655  401632
##  [397]  401946  404911  406166  408111  411406  413116  413476  413492  414841
##  [406]  415610  415673  418889  419783  421744  422387  423470  427690  428474
##  [415]  428788  429384  429572  431878  431894  432381  432741  434404  434671
##  [424]  436052  436930  438640  440852  443174  443833  445543  445810  446061
##  [433]  446233  446578  446751  447174  447269  447833  450218  452399  452524
##  [442]  452728  453010  455709  456760  457670  458140  458940  461168  464243
##  [451]  468400  468651  468730  469216  472777  473530  474723  476480  476527
##  [460]  476558  476856  477060  478849  480229  480386  481186  482049  482065
##  [469]  483226  483492  483634  484481  485453  486159  487571  487634  487854
##  [478]  487885  488246  490034  490599  490991  493093  494725  496702  497486
##  [487]  498098  498255  498616  499133  502522  504138  505581  505769  507479
##  [496]  507668  507699  507887  508123  508531  509660  509723  510209  512719
##  [505]  512829  514021  515182  515324  516814  517771  519575  519669  520705
##  [514]  521238  521740  522415  524893  524940  526148  526227  527670  527858
##  [523]  528533  529553  530337  530619  531043  531812  532533  535169  535420
##  [532]  537256  537726  538903  541664  541978  547045  548394  549947  551642
##  [541]  555234  557603  557760  558011  558199  562796  563125  563847  563972
##  [550]  564067  565918  566420  566561  567330  567816  568036  568647  570106
##  [559]  571644  572240  572303  573730  574170  575393  576413  577213  579017
##  [568]  579692  580351  580665  582516  583661  585104  585214  585418  586202
##  [577]  586736  588524  589371  589418  590736  591034  591834  592211  595270
##  [586]  596259  597608  598973  601012  601781  602189  602863  603208  603804
##  [595]  604463  604605  606848  607303  609358  609640  610597  610801  611099
##  [604]  611507  611868  612339  612825  615571  621626  621893  624434  624528
##  [613]  624779  625360  625470  625815  630161  631117  631243  631353  633486
##  [622]  636389  637816  638695  640170  640264  640797  641111  642083  644876
##  [631]  646131  646523  646664  647119  647229  647370  647904  647935  649959
##  [640]  651167  654728  654885  655277  655309  655764  655779  659105  659152
##  [649]  660376  664110  664878  666573  666839  667075  667922  669349  669710
##  [658]  670683  670887  673256  677397  678966  679045  679327  679955  681523
##  [667]  681727  682778  686544  686842  687673  689116  690011  690183  692599
##  [676]  692740  692772  694356  694733  695345  697227  698388  698514  699486
##  [685]  699533  699957  700459  701055  703659  703816  707597  707911  709056
##  [694]  709652  711252  711754  711864  712100  714139  714861  715206  718485
##  [703]  719112  719316  723191  724446  724666  725356  726376  728274  728933
##  [712]  731741  734832  735004  735647  736479  736730  738691  740291  741405
##  [721]  742566  742691  743366  744637  745139  745154  747664  750175  750190
##  [730]  751900  754112  755603  755932  756434  757093  757642  758270  759070
##  [739]  759383  759493  759838  760435  761093  764859  765988  766271  767431
##  [748]  768498  769722  770553  770930  772279  776327  777378  777848  778256
##  [757]  779182  781645  786791  787983  789818  790116  792109  793489  794446
##  [766]  795325  796800  796894  797427  798714  800612  801114  801506  801537
##  [775]  802761  803295  803498  804189  806589  809836  809962  811515  814198
##  [784]  815578  816849  817006  817398  820740  821383  822920  822983  825838
##  [793]  825980  826089  826654  827517  828443  828490  833965  834498  835957
##  [802]  836585  837824  838201  839409  839487  840930  841119  843174  843597
##  [811]  843880  844303  844429  845009  845072  845292  845747  845794  846813
##  [820]  848429  848680  849229  849323  850531  850986  851363  851426  852163
##  [829]  854924  856054  856163  858140  861576  863019  863207  864227  864541
##  [838]  864902  865561  865969  867098  867161  868494  871020  871459  874252
##  [847]  875115  875742  877013  877107  877233  877327  879178  879821  881076
##  [856]  881296  881986  882378  882927  884904  885296  885563  886991  888042
##  [865]  888057  888763  889971  890473  891462  892858  893925  895321  896921
##  [874]  899416  899996  903996  904295  905095  906805  907620  908530  909079
##  [883]  909519  910476  912233  912609  914272  914900  915041  915198  915449
##  [892]  917818  917865  920563  921410  921489  922744  922901  923858  924062
##  [901]  924768  925128  925787  926085  926352  927199  927638  931608  932831
##  [910]  934008  934478  934651  934886  935153  937130  937789  938040  938275
##  [919]  938730  939954  942652  943232  943421  946778  946856  948174  949649
##  [928]  951077  953430  953524  953696  954057  955046  955344  958136  958168
##  [937]  959391  959783  959925  960380  961164  962042  963156  963219  966357
##  [946]  968537  973479  973636  977370  979331  979833  981872  982217  982610
##  [955]  982908  983253  984147  985120  985935  987630  988555  988681  992587
##  [964]  993215  999804 1000102 1001059 1002377 1003443 1003789 1003961 1004557
##  [973] 1004808 1005342 1005373 1005514 1005859 1005954 1007993 1008056 1008605
##  [982] 1012746 1012794 1013217 1014315 1014770 1016543 1016920 1017076 1019414
##  [991] 1020857 1021171 1022316 1024513 1025093 1025124 1025360 1027148 1027399
## [1000] 1028121
## 
## Available components:
##  [1] "sample"     "medoids"    "i.med"      "clustering" "objective" 
##  [6] "clusinfo"   "diss"       "call"       "silinfo"    "data"

Plot the output

fviz_cluster(clara_clust_5, geom="point", ellipse.type="norm") 

fviz_cluster(clara_clust_5, palette=c("#00AFBB", "#FC4E07", "#E7B800","#7E57C2","#4DB6AC"), ellipse.type="t", geom="point", pointsize=1, ggtheme=theme_classic())

fviz_silhouette(clara_clust_5)
##   cluster size ave.sil.width
## 1       1   37          0.59
## 2       2  325          0.59
## 3       3  201         -0.09
## 4       4  123          0.23
## 5       5  314          0.33

6 clusters and 10 samples specified

clara_clust_6<-clara(geo_z, 6, metric="euclidean", stand=FALSE, samples=10,
                     sampsize=1000, trace=0, medoids.x=TRUE,
                     rngR=FALSE, pamLike=FALSE, correct.d=TRUE)
class(clara_clust_6)
## [1] "clara"     "partition"
clara_clust_6
## Call:     clara(x = geo_z, k = 6, metric = "euclidean", stand = FALSE, samples = 10, sampsize = 1000, trace = 0, medoids.x = TRUE, rngR = FALSE, pamLike = FALSE, correct.d = TRUE) 
## Medoids:
##             Lat          Lon
## [1,] -2.3077290  3.255075201
## [2,]  0.3938083 -0.190058975
## [3,]  0.9595881  0.187208083
## [4,] -1.3280237  0.008863656
## [5,] -0.2919854 -0.402700407
## [6,]  0.7562992  1.823175233
## Objective function:   0.4406954
## Clustering vector:    int [1:1028136] 1 2 2 2 3 4 1 5 1 2 1 3 4 1 5 5 3 5 ...
## Cluster sizes:            30283 313464 153436 118516 365182 47255 
## Best sample:
##    [1]     157     393    1240    2667    3028    3279    4001    5272    6574
##   [10]    7719    8456    9539   10715   12284   12363   14841   15045   16097
##   [19]   18481   20881   20991   23329   24050   25619   26058   26090   27204
##   [28]   27674   30545   31565   31706   31832   33777   34373   36977   36993
##   [37]   38342   42970   45072   47457   48524   52634   52744   56509   57984
##   [46]   59694   60055   61419   62188   62251   65467   66361   68322   68965
##   [55]   70048   73609   75052   75366   76151   76684   78457   78472   79320
##   [64]   80983   83508   85218   87430   88921   89752   90411   92388   92639
##   [73]   92811   93156   93329   93753   94412   96796   98977   99102   99306
##   [82]   99589  100750  101816  102287  103338  104248  104719  105519  105597
##   [91]  109645  110821  114979  115308  115794  120109  121301  123058  123136
##  [100]  123435  125427  126807  126964  127764  128643  130118  130212  131059
##  [109]  132032  134150  134432  134824  136079  136613  137177  139672  139907
##  [118]  143155  143280  144676  144833  145194  145712  147516  149100  150716
##  [127]  152159  152348  154058  154466  154701  155109  156238  156301  156787
##  [136]  159298  159407  159972  160835  161761  163392  166153  167283  167816
##  [145]  168993  169275  169903  171142  171472  171519  172727  172805  174248
##  [154]  174437  175111  176915  177198  177621  178327  178390  179065  179112
##  [163]  180131  181747  181998  182547  183834  183849  184304  184681  185481
##  [172]  188242  188556  189481  193623  194972  196525  197545  197859  198220
##  [181]  199287  201812  204338  204777  207570  208433  209060  209672  210425
##  [190]  210551  210645  212496  213139  214394  214614  215226  215304  218222
##  [199]  218881  220309  220748  221972  223791  224780  225596  226270  226929
##  [208]  227243  228639  229094  230239  231683  231792  231996  232734  233314
##  [217]  235103  235997  237315  237613  238413  238789  240123  241848  242837
##  [226]  244186  245551  247590  248218  248359  248767  249441  249787  250383
##  [235]  251042  251183  253881  254807  255936  256219  257176  257380  258086
##  [244]  258446  258917  259403  259670  264926  266149  267797  268204  268471
##  [253]  270448  271013  271107  271358  271593  272048  272393  273272  276739
##  [262]  277696  277821  277931  280065  282967  284395  285273  286748  286842
##  [271]  287375  288662  291454  291486  292505  292709  293101  293243  293698
##  [280]  293807  293949  294482  294513  296537  301463  301855  302357  305683
##  [289]  306797  306954  310688  313151  315190  315928  316288  316571  317465
##  [298]  318438  319834  321999  323976  325544  325623  325905  326533  328306
##  [307]  329357  333122  333420  334251  335695  336589  336761  338126  339177
##  [316]  339319  339350  340935  341311  341374  341923  344966  346065  346112
##  [325]  346535  347037  347633  348088  350238  350394  354175  354489  355634
##  [334]  356230  357831  358442  358678  359258  360466  360717  361439  365063
##  [343]  365690  365894  369722  369769  371024  371244  371934  372483  372876
##  [352]  374852  375511  378319  381410  383057  383308  383496  385269  386869
##  [361]  387983  389144  389270  389944  391215  391717  394243  396753  396768
##  [370]  398479  400408  400424  400691  402181  402510  402557  403671  404220
##  [379]  404848  405648  405962  407672  407766  411437  412566  412692  412849
##  [388]  414010  415077  416300  417132  417508  417587  418857  422905  423956
##  [397]  424427  424835  425760  427486  428223  433369  434561  435895  436397
##  [406]  436726  438687  440068  440946  441025  441903  443378  443472  444005
##  [415]  445292  447190  447849  448084  448116  449339  449826  449873  450077
##  [424]  450767  453167  456305  456415  456540  460776  461686  462157  463427
##  [433]  463584  467318  472417  472558  472668  473232  474095  475021  475068
##  [442]  475884  477578  480543  480574  481076  482535  482865  483163  484245
##  [451]  484402  486065  487509  489752  490176  491007  491587  491650  491870
##  [460]  492325  493392  493737  495007  495462  495808  495902  497110  497941
##  [469]  498004  498741  501502  502632  502742  504718  508154  509362  509597
##  [478]  509786  510805  511119  512139  512547  513676  513739  515041  515073
##  [487]  516845  518038  519199  520830  520971  521693  522320  523591  523685
##  [496]  523905  524721  525254  525756  526399  527654  528564  528957  529506
##  [505]  531420  531482  531686  531875  533569  534353  534620  534636  535342
##  [514]  536550  538040  539185  539436  540503  541899  542919  543499  545680
##  [523]  545994  546574  550873  551673  553383  554199  555658  555862  555893
##  [532]  557054  558811  559188  560850  561478  561619  561776  562027  562215
##  [541]  563517  564396  564443  565965  567141  567785  567863  567989  568067
##  [550]  568083  569322  569934  570436  570640  571346  571707  572366  572664
##  [559]  572930  573777  574217  578186  579409  580586  581057  581229  581465
##  [568]  581731  583708  584367  584681  584853  586532  589230  589811  589999
##  [577]  593356  593435  594752  595851  596227  597655  600008  600102  600275
##  [586]  600635  601624  604746  605797  605969  606362  607742  608621  609735
##  [595]  609797  610990  611319  612935  614614  615116  615524  620057  620214
##  [604]  620434  624607  625909  626411  628451  628796  629486  629831  630725
##  [613]  631698  632514  634208  634396  635134  635259  637565  639165  639793
##  [622]  646382  646680  647637  648955  650022  650367  650539  651136  651387
##  [631]  651920  651951  652092  652438  652532  654571  654634  655042  655183
##  [640]  659325  659372  659795  661348  663121  663764  665992  667435  667749
##  [649]  668894  669067  670589  671091  671671  671703  671938  673554  673726
##  [658]  674699  677272  677601  678323  678951  681304  681414  682982  684284
##  [667]  685194  685743  686136  688050  688112  689305  690858  691250  691579
##  [676]  691972  694027  694670  696757  696788  698529  699361  701243  702404
##  [685]  702530  702922  703502  703957  704475  705071  707675  707832  710013
##  [694]  710829  712492  712523  712711  713668  713684  715269  715771  715818
##  [703]  716116  718108  718155  718877  719222  720148  721026  722595  723332
##  [712]  724415  724697  725952  728996  729560  729717  730392  730408  730847
##  [721]  733357  735757  737216  737687  739020  740495  740746  741483  742079
##  [730]  745421  746441  746707  749155  749986  751869  753218  754206  757266
##  [739]  758128  759948  760450  761109  761376  763086  763337  763400  764027
##  [748]  766365  766522  767620  769565  774570  774930  774946  776295  776687
##  [757]  777064  780343  781237  783198  785677  788328  789144  789928  790242
##  [766]  790838  791026  793348  793835  794195  796125  797506  803012  804267
##  [775]  804628  805130  806997  807515  807687  808032  808205  808723  809162
##  [784]  811264  811672  813853  813978  815892  817163  817979  818214  819594
##  [793]  820395  821414  822622  822858  825399  825697  826936  826999  828301
##  [802]  829854  830105  830184  830670  832459  834231  836851  837934  837981
##  [811]  838514  841840  842217  842766  843503  844680  844946  845135  845935
##  [820]  847613  847880  847896  848602  849025  849088  849308  849810  852053
##  [829]  852445  852696  854548  856179  858940  859552  859709  860070  860587
##  [838]  863976  865592  867035  867224  867459  868918  869122  869153  869341
##  [847]  869577  869985  870314  871114  871177  871663  872448  875036  875475
##  [856]  876778  877656  878268  879225  881029  881045  881123  881249  881343
##  [865]  882582  883194  883869  885626  886347  886394  887038  887477  887602
##  [874]  889312  889987  891007  892074  892497  893987  894489  896874  897941
##  [883]  898710  899180  902490  903071  903432  906616  906695  908013  908499
##  [892]  909111  909848  913096  913535  914884  919057  919214  919465  921881
##  [901]  922995  924250  924579  925426  926195  927874  928784  929490  930101
##  [910]  931560  933694  933757  935624  936847  937867  939169  940471  941146
##  [919]  941805  942056  942746  943970  945774  946558  946668  946872  947468
##  [928]  947656  948394  949978  950825  950873  952190  953665  956724  959062
##  [937]  963627  963800  964317  964396  964662  965180  965259  965353  965917
##  [946]  968302  970812  971095  972554  973793  977025  979252  982327  983849
##  [955]  984931  985888  986234  986814  986924  987269  990862  992572  992697
##  [964]  992807  994564  994674  994940 1000149 1001310 1002565 1003538 1004118
##  [973] 1004510 1005232 1006330 1007287 1008118 1008573 1008683 1008824 1009389
##  [982] 1010048 1010801 1012621 1016182 1016339 1016763 1017218 1017233 1018331
##  [991] 1020559 1020606 1020936 1021093 1024089 1025564 1025752 1025783 1025972
## [1000] 1026332
## 
## Available components:
##  [1] "sample"     "medoids"    "i.med"      "clustering" "objective" 
##  [6] "clusinfo"   "diss"       "call"       "silinfo"    "data"

Plot the output

fviz_cluster(clara_clust_6, geom="point", ellipse.type="norm") 

fviz_cluster(clara_clust_6, palette=c("#00AFBB", "#FC4E07", "#E7B800","#7E57C2","#4DB6AC","#F06292"), ellipse.type="t", geom="point", pointsize=1, ggtheme=theme_classic())

fviz_silhouette(clara_clust_6)
##   cluster size ave.sil.width
## 1       1   34          0.68
## 2       2  281          0.50
## 3       3  159          0.23
## 4       4  132          0.28
## 5       5  350          0.30
## 6       6   44          0.44

The highest average silhouette score attains iteration with three clusters.Continue evaluating three clusters.

clara_clust_3$medoids
##             Lat         Lon
## [1,] -1.3451686  0.20092689
## [2,] -0.2087105 -0.40955981
## [3,]  0.6191405  0.01229336
clara_clust_3$i.med
## [1] 585418 443174  77155
uber_data$Weekday <- weekdays(uber_data$Date)
uber_data <- cbind(uber_data,geo_z,clara_clust_3$clustering)
colnames(uber_data) <- c("Date.Time", "Lat", "Lon", 
                         "Base", "Date", "Time","Weekday","Lat_z", "Lon_z", "cluster")
uber_data$cluster <- as.factor(uber_data$cluster)

medoids <- uber_data[c(clara_clust_3$i.med), -c(1)]
medoids
##            Lat      Lon   Base       Date     Time  Weekday      Lat_z
## 585418 40.6843 -73.9601 B02617 2014-09-25 10:34:00 Thursday -1.3451686
## 443174 40.7307 -73.9957 B02617 2014-09-13 18:30:00 Saturday -0.2087105
## 77155  40.7645 -73.9711 B02598 2014-09-06 14:23:00 Saturday  0.6191405
##              Lon_z cluster
## 585418  0.20092689       1
## 443174 -0.40955981       2
## 77155   0.01229336       3

Two medoids were registered in the evening and one in the early morning. Seems like there was a wrong bias regarding clustering method, because the early morning in the weekday isn’t a popular time for taxi rides. Each of the medoids represents different Weekday, day of September and has different bases.

Plot the points on the map.

Data frame with coordinates: identify the places under those longitude and latitude.

locations <- data.frame(lat = c(medoids$Lat),lon = c(medoids$Lon))
locations$address <- apply(locations, 1, function(row) {revgeocode(c(row['lon'], row['lat']))})
## Warning: Reverse geocoding failed with error:
## You must enable Billing on the Google Cloud Project at https://console.cloud.google.com/project/_/billing/enable Learn more at https://developers.google.com/maps/gmp-get-started
## Warning: Reverse geocoding failed with error:
## You must enable Billing on the Google Cloud Project at https://console.cloud.google.com/project/_/billing/enable Learn more at https://developers.google.com/maps/gmp-get-started
## Warning: Reverse geocoding failed with error:
## You must enable Billing on the Google Cloud Project at https://console.cloud.google.com/project/_/billing/enable Learn more at https://developers.google.com/maps/gmp-get-started
print(locations)
##       lat      lon address
## 1 40.6843 -73.9601    <NA>
## 2 40.7307 -73.9957    <NA>
## 3 40.7645 -73.9711    <NA>

The places under these addresses are:
- for cluster 1 is John F.Kennedy International Airport;
- for cluster 2 is Midtown Manhattan, near Central Park;
- for the cluster 3 is Greenwich Village or Lower Manhattan, near NY University.

This destinations are quite reasonable, especially the place near the park and airport.

Analysis of the clusters by time and date

uber_data %>%
  group_by(cluster) %>%
  summarize(counts = n())
## # A tibble: 3 × 2
##   cluster counts
##   <fct>    <int>
## 1 1       155513
## 2 2       446220
## 3 3       426403
uber_data %>%
  group_by(cluster, Date) %>%
  summarize(counts = n()) %>%
  ggplot(aes(y=counts, x = Date, color=cluster)) +
  geom_line(size = 1.2) +
  theme_minimal() +
  labs(x = "", y = "", title = "Number of Uber assigned clusters by Date - Sep 2014")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

We can observe that clusters 2 and 3 have pretty the same trend of pickups, however the second cluster has lower pickups numbers (494 041 vs 446 087). The smallest number of pickups has the first cluster (36 600). Seems like destinations to the Parks (clusters 2 and 3) much more popular than toward the airport (cluster 1). Additionally, cluster 1 is stable over the time comparing with clusters 2 and 3.

uber_data %>%
  group_by(cluster, Weekday) %>%
  summarize(counts = n()) %>%
  ggplot(aes(y=counts, x = Weekday, fill = cluster)) +
  geom_col(position = position_dodge()) +
  theme_minimal() +
  theme(axis.text.x = element_text(size=12))  +
  labs(x = "", y = "", title = "Number of Uber pickups by Weekday - Sep 2014")

Cluster 3 is popular during Friday and Saturday. We can guess that this area is popular for evening activities (later we can check this bias). For the second cluster we can observe peaks on Tuesday and Thursday and the lowest values during the weekend. We can see again, that the first cluster is quite stable during the week.

uber_data <- uber_data %>%
  mutate(Time = as.POSIXct(Time, format = "%H:%M")) %>%
  filter(format(Time, "%H:%M") %in% c("00:00", "10:00", "20:00"))
uber_data %>%
  group_by(cluster, Time) %>%
  summarize(counts = n(), .groups = "drop") %>%
  ggplot(aes(y = counts, x = Time, color = cluster)) +
  geom_line(size = 0.9, alpha = 0.7) +
  theme_minimal() +
  labs(x = "Time", y = "Number of Pickups", title = "Number of Uber Pickups by Cluster and Time - Sep 2014") +
  scale_x_datetime(date_labels = "%H:%M", date_breaks = "10 hours")

All three clusters show an upward trend toward the evening, with orders starting to rise from the first half of the day—most noticeably in clusters 2 and 3.

Forecasting

Split as 95% : 5% and select a random period ‘2014-09-14’

set.seed(111)
uber_fr <- uber_data[uber_data$Date=='2014-09-14',c(2:4,6)]
uber_z_fr <- as.data.frame(lapply(uber_fr[,1:2], scale))

split = sample.split(1:nrow(uber_z_fr), SplitRatio = 0.95)
training_set = subset(uber_z_fr, split == TRUE)
test_set = subset(uber_z_fr, split == FALSE)
uber_fr <- cbind(uber_fr,uber_z_fr,split)
colnames(uber_fr) <- c('Lat','Lon','Base','Time','Lat_z','Lon_z','split')
pred_kcca = kcca(uber_fr[split==TRUE, 5:6], k=3, kccaFamily("kmeans"))
pred_kcca
## kcca object of family 'kmeans' 
## 
## call:
## kcca(x = uber_fr[split == TRUE, 5:6], k = 3, family = kccaFamily("kmeans"))
## 
## cluster sizes:
## 
##  1  2  3 
## 14  3 57
pred_train <- predict(pred_kcca)
pred_test <- predict(pred_kcca, newdata=uber_fr[split==FALSE, 5:6])
image(pred_kcca)
points(uber_fr[split==TRUE, 5:6], col=pred_train, pch=19, cex=0.3)
points(uber_fr[split==FALSE, 5:6], col=pred_test, pch=22, bg="grey")

Resources

  1. Unsupervised Learning course materials by Jacek Lewkowicz.University of Warsaw, Faculty of Economic Science.