Uber trip data:
− Load the data [uber-data.csv]
− Discover
and comment clusters of Uber data based on locations (longitude &
latitude)
− Analyze the cluster centers by time
− Analyze the
cluster centers by date
− Remember to choose the right algorithm,
compute the optimal number of clusters and quality measures
−
Develop adequate plots
− Apply the data set for forecasting
library(DataSum)
library(caTools)
library(tidyverse)
library(factoextra)
library(stats)
library(flexclust)
library(mclust)
library(fpc)
library(clustertend)
library(cluster)
library(ClusterR)
library(hms)
library(ggmap)
library(tidyr)
library(ggmap)
library(ggplot2)
Exclude the found number of rows and upload the data set.
setwd("C:/Users/ydmar/Documents/UW/UW - 1 semester/UL")
uber_data <- read_csv("uber-data.csv")
spec(uber_data)
## cols(
## `Date/Time` = col_character(),
## Lat = col_double(),
## Lon = col_double(),
## Base = col_character()
## )
unlist(lapply(uber_data,class))
## Date/Time Lat Lon Base
## "character" "numeric" "numeric" "character"
Check NA values in the data set.
na_rows <- uber_data[!complete.cases(uber_data[,c("Date/Time","Lat","Lon","Base")]),]
if (nrow(na_rows) > 0) {
print(na_rows)
} else {
print("There are no NA values")
}
## [1] "There are no NA values"
Since data set doesn’t have NA values, we can continue analysis just with the data.
Calculate the mode of latitude and longitude values
Lat_mode <- getmode(uber_data$Lat)
Lat_mode
## [1] 40.774
Lon_mode <- getmode(uber_data$Lon)
Lon_mode
## [1] -73.9888
Split data and time for the further analysis
uber_data$dt <-uber_data$`Date/Time`
uber_data <- separate(data = uber_data, col = dt, into = c("Date", "Time"), sep = "\\s")
uber_data$`Date/Time` <- as.POSIXct(uber_data$`Date/Time`, format = "%m/%d/%Y %H:%M:%S")
uber_data$Date <- as.Date(uber_data$Date,format='%m/%d/%Y')
uber_data$Time <- as_hms(uber_data$Time)
Another look at the data set’s structure
str(uber_data)
## tibble [1,028,136 × 6] (S3: tbl_df/tbl/data.frame)
## $ Date/Time: POSIXct[1:1028136], format: "2014-09-01 00:01:00" "2014-09-01 00:01:00" ...
## $ Lat : num [1:1028136] 40.2 40.8 40.8 40.7 40.8 ...
## $ Lon : num [1:1028136] -74 -74 -74 -74 -73.9 ...
## $ Base : chr [1:1028136] "B02512" "B02512" "B02512" "B02512" ...
## $ Date : Date[1:1028136], format: "2014-09-01" "2014-09-01" ...
## $ Time : 'hms' num [1:1028136] 00:01:00 00:01:00 00:03:00 00:06:00 ...
## ..- attr(*, "units")= chr "secs"
head(uber_data,5)
## # A tibble: 5 × 6
## `Date/Time` Lat Lon Base Date Time
## <dttm> <dbl> <dbl> <chr> <date> <time>
## 1 2014-09-01 00:01:00 40.2 -74.0 B02512 2014-09-01 01'00"
## 2 2014-09-01 00:01:00 40.8 -74.0 B02512 2014-09-01 01'00"
## 3 2014-09-01 00:03:00 40.8 -74.0 B02512 2014-09-01 03'00"
## 4 2014-09-01 00:06:00 40.7 -74.0 B02512 2014-09-01 06'00"
## 5 2014-09-01 00:11:00 40.8 -73.9 B02512 2014-09-01 11'00"
Applying CLARA clustering method since data set is quite large (over 1 m. rows).
The clusters are going to be created by geographical variables (longitude and latitude).
Plot the data set:
ggplot(uber_data, aes(Lat, Lon, alpha = .001)) +
geom_point(show.legend = FALSE) +
theme_minimal() +
ggtitle('All Uber pickups - Sep') +
xlab('Latitude') +
ylab('Longitude')
We can assume, that there are 3 clusters and outliers on the graph.That’s why it’s quite reasonable to apply CLARA method since CLARA draws multiple samples of the data set, then applies PAM on each sample, and gives the best clustering as the output. We are going to run a few iterations of CLARA (from 3 up to 5-7). For clustering computations set the number of samples = 6 and sample size = 1000. Lets use Euclidean distance.
z-score standardization
geo <- uber_data[2:3]
str(geo)
## tibble [1,028,136 × 2] (S3: tbl_df/tbl/data.frame)
## $ Lat: num [1:1028136] 40.2 40.8 40.8 40.7 40.8 ...
## $ Lon: num [1:1028136] -74 -74 -74 -74 -73.9 ...
geo_z <- as.data.frame(lapply(geo,scale))
Start with 3 clusters and 6 samples specified
clara_clust_3<-clara(geo_z, 3, metric="euclidean", stand=FALSE, samples=6,
sampsize=1000, trace=0, medoids.x=TRUE,
rngR=FALSE, pamLike=FALSE, correct.d=TRUE)
class(clara_clust_3)
## [1] "clara" "partition"
clara_clust_3
## Call: clara(x = geo_z, k = 3, metric = "euclidean", stand = FALSE, samples = 6, sampsize = 1000, trace = 0, medoids.x = TRUE, rngR = FALSE, pamLike = FALSE, correct.d = TRUE)
## Medoids:
## Lat Lon
## [1,] -1.3451686 0.20092689
## [2,] -0.2087105 -0.40955981
## [3,] 0.6191405 0.01229336
## Objective function: 0.6377632
## Clustering vector: int [1:1028136] 1 2 3 2 3 1 3 2 1 3 1 3 1 1 2 2 3 2 ...
## Cluster sizes: 155513 446220 426403
## Best sample:
## [1] 2558 2887 3609 4236 6590 6699 8268 9570 10480
## [10] 11029 11421 13335 13398 16144 16536 16865 17257 19313
## [19] 19956 21603 22042 22074 23815 25415 26529 27690 27816
## [28] 28490 29761 30357 32961 33118 35299 36115 37777 37809
## [37] 38954 38970 40727 41056 41103 42766 43394 43441 44508
## [46] 45433 46312 47881 48618 49700 49983 51238 52556 53622
## [55] 54281 54846 55678 55693 56133 57403 58643 61043 61451
## [64] 62502 62973 63380 64306 65781 66032 66769 67365 71727
## [73] 74441 75272 77155 79492 81924 82551 83414 85736 86395
## [82] 86662 87885 88372 88623 89313 91650 91713 92905 94851
## [91] 94961 99322 100216 100232 100702 101581 101973 102130 102350
## [100] 105629 106523 110963 111778 112641 113614 114430 115214 116124
## [109] 116312 119120 119481 121081 121411 121709 122791 122948 128298
## [118] 129553 130133 130416 130871 131938 132283 132800 133491 134008
## [127] 134353 134448 135656 136550 136958 139138 139264 141178 141288
## [136] 142449 143264 143500 144880 145680 146700 147908 148143 149351
## [145] 149665 150685 150983 151093 152222 152285 153587 155140 155391
## [154] 155470 155956 157744 159376 159517 160866 162137 163220 163267
## [163] 163800 167110 167126 167502 168052 168789 169965 170232 170420
## [172] 171221 172899 173166 173182 173888 174311 174374 175095 176586
## [181] 177731 177982 179833 180445 181465 184226 184540 184838 185356
## [190] 185873 189262 191929 192509 192745 194204 194408 194439 194627
## [199] 195270 195600 196949 197733 200024 200322 200761 202063 202942
## [208] 203554 204511 206315 206331 206409 206534 206613 206629 207868
## [217] 208480 209154 210911 211476 211633 212323 212763 215273 216292
## [226] 216732 217955 219132 219775 222254 223227 223399 223995 225078
## [235] 227776 228357 228717 231902 231981 233298 233785 234397 235134
## [244] 238821 240170 243292 244343 244751 247167 248281 249536 249865
## [253] 251481 253160 254069 255387 256846 258603 258980 260909 262133
## [262] 263153 264455 265757 266432 266996 267091 267342 268032 268377
## [271] 269256 271060 271844 272158 272754 272942 273680 273805 275264
## [280] 276111 276158 278951 286183 288913 289085 289603 289681 289948
## [289] 290466 290497 290544 290638 291078 293180 293588 296098 297839
## [298] 299079 299894 301667 302310 304538 307613 309135 310217 311174
## [307] 312100 312272 312555 315818 316147 317983 319850 319960 320226
## [316] 324289 324682 325435 326596 327851 329404 329796 330518 332573
## [325] 333969 334110 334675 335302 335334 337907 341468 341625 342048
## [334] 342503 342519 343617 345845 345892 346221 346378 349375 351038
## [343] 351069 351257 351618 352214 352230 353579 353814 354364 354662
## [352] 356450 356701 357423 358693 359572 359996 361141 361878 362961
## [361] 364498 365706 365784 367542 368263 368467 368953 369393 369518
## [370] 371903 374303 374413 376750 379041 379292 379480 380625 381096
## [379] 383967 384987 385128 385253 387199 387701 388532 390415 391764
## [388] 392752 396392 396674 398494 399655 401632 401946 404911 406166
## [397] 408111 411406 413116 413476 413492 414841 415610 415673 418889
## [406] 419783 421744 422387 423470 427690 428474 428788 429384 429572
## [415] 431878 431894 432381 432741 434404 434671 436052 436930 438640
## [424] 440852 443174 443833 445543 445810 446061 446233 446578 446751
## [433] 447174 447269 447833 450218 452399 452524 452728 453010 455709
## [442] 456760 457670 458140 458940 461168 464243 466847 468400 468651
## [451] 468730 469216 472777 473530 474723 476480 476527 476558 476856
## [460] 477060 478849 480229 480386 482049 482065 483226 483492 483634
## [469] 484481 485453 486159 487571 487634 487854 488246 490034 490599
## [478] 490991 493093 494725 496702 497486 498098 498255 498616 499133
## [487] 502522 504138 505581 505769 507479 507668 507699 507887 508123
## [496] 508531 509660 509723 510209 512719 512829 514021 515182 515324
## [505] 516814 517771 519575 519669 519889 520705 521238 521740 522415
## [514] 524893 524940 526148 526227 527670 527858 528533 529553 530337
## [523] 530619 531043 531812 532533 535169 535420 537256 537726 538903
## [532] 541664 541978 547045 548394 549947 551642 555234 557603 557760
## [541] 558011 558199 562796 563125 563847 563972 564067 565918 566420
## [550] 566561 567330 567816 568036 568647 570106 571644 572240 572303
## [559] 573730 574170 575393 576413 577213 579017 579692 580351 580665
## [568] 582516 583661 585104 585214 585418 586202 586736 588524 589371
## [577] 589418 590736 591034 591834 592101 592211 595270 596259 597608
## [586] 598973 601012 601781 602189 602863 603208 603804 604463 604605
## [595] 606848 607303 609358 609640 610597 610801 611099 611507 611868
## [604] 612339 612825 615571 619398 621626 621893 624434 624528 624779
## [613] 625360 625470 625815 630161 631117 631243 631353 633486 636389
## [622] 637816 638695 640170 640264 640797 641111 642083 644876 646131
## [631] 646523 646664 647119 647229 647370 647904 647935 649959 651167
## [640] 654728 654885 655277 655309 655764 655779 659105 659152 660376
## [649] 663153 664110 664878 666573 666839 667075 667922 669349 669710
## [658] 670683 670887 673256 677397 678966 679045 679327 679955 681523
## [667] 681727 682778 686544 686842 687673 689116 690011 690183 692599
## [676] 692740 692772 694356 694733 695345 697227 698388 698514 699486
## [685] 699533 699957 700459 701055 703659 703816 705024 707597 707911
## [694] 709056 709652 711252 711754 711864 712100 714139 714861 715206
## [703] 718485 719112 719316 723191 724446 724666 725356 726376 728274
## [712] 728933 731741 734832 735004 735647 736479 736730 738691 740291
## [721] 741405 742566 742691 743366 745139 745154 747664 750175 750190
## [730] 751900 754112 755603 755932 756434 757093 757642 758270 759070
## [739] 759383 759493 759838 760435 761093 764859 765988 766271 767431
## [748] 768498 769722 770553 770930 772279 776327 777378 777848 778256
## [757] 779182 781645 786791 787983 789818 790116 792109 793489 794446
## [766] 795325 796800 796894 797427 798714 800612 801114 801506 801537
## [775] 802761 803295 803498 804189 806589 809836 809962 811515 814198
## [784] 815578 816849 817006 817398 820740 821383 822920 822983 825838
## [793] 825980 826089 826654 827517 828443 828490 833965 834498 835957
## [802] 836585 837824 838201 839409 839487 840930 841119 843174 843597
## [811] 843880 844303 844429 845009 845072 845292 845747 845794 846813
## [820] 848429 848680 849229 849323 850531 850986 851363 851426 852163
## [829] 854924 856054 856163 858140 861576 863019 863207 864227 864541
## [838] 864902 865561 865969 867098 867161 868494 871020 871459 874252
## [847] 875115 875742 877013 877107 877233 877327 879178 879821 881076
## [856] 881296 881986 882378 882927 884904 885296 885563 886991 888057
## [865] 888763 889971 890473 891462 892858 893925 895321 896921 899416
## [874] 899996 903996 904295 905095 906805 907620 908530 909079 909519
## [883] 910476 912233 912609 914272 914900 915041 915198 915449 917818
## [892] 917865 920563 921410 921489 922744 922901 923858 924062 924768
## [901] 925128 925787 926085 926352 927199 927638 931608 932831 934008
## [910] 934478 934651 934886 935153 937130 937789 938040 938275 938730
## [919] 939954 942652 943232 943421 944378 946778 946856 948174 949649
## [928] 951077 953430 953524 953696 954057 955046 955344 958136 958168
## [937] 959391 959783 959925 960380 961164 962042 963156 963219 966357
## [946] 968537 973479 973636 977370 979331 979833 981872 982217 982610
## [955] 982908 983253 984147 985120 985935 987630 988555 988681 992587
## [964] 993215 999804 1000102 1001059 1002377 1003443 1003789 1003961 1004557
## [973] 1004808 1005342 1005373 1005514 1005859 1005954 1007993 1008056 1008605
## [982] 1012746 1012794 1013217 1014315 1014770 1016543 1016920 1017076 1019414
## [991] 1020857 1021171 1022316 1024513 1025093 1025124 1025360 1027148 1027399
## [1000] 1028121
##
## Available components:
## [1] "sample" "medoids" "i.med" "clustering" "objective"
## [6] "clusinfo" "diss" "call" "silinfo" "data"
Plot the output
fviz_cluster(clara_clust_3, geom="point", ellipse.type="norm")
fviz_cluster(clara_clust_3, palette=c("#00AFBB", "#FC4E07", "#E7B800"), ellipse.type="t", geom="point", pointsize=1, ggtheme=theme_classic())
fviz_silhouette(clara_clust_3)
## cluster size ave.sil.width
## 1 1 159 0.01
## 2 2 387 0.56
## 3 3 454 0.27
Continue using this approach since it allows us to have more granular control over the clustering process.
4 clusters and 6 samples specified
clara_clust_4<-clara(geo_z, 4, metric="euclidean", stand=FALSE, samples=6,
sampsize=1000, trace=0, medoids.x=TRUE,
rngR=FALSE, pamLike=FALSE, correct.d=TRUE)
class(clara_clust_4)
## [1] "clara" "partition"
clara_clust_4
## Call: clara(x = geo_z, k = 4, metric = "euclidean", stand = FALSE, samples = 6, sampsize = 1000, trace = 0, medoids.x = TRUE, rngR = FALSE, pamLike = FALSE, correct.d = TRUE)
## Medoids:
## Lat Lon
## [1,] -2.3101783 3.25507520
## [2,] -0.1572759 -0.41127466
## [3,] 0.6289375 0.01572306
## [4,] -1.2545458 0.07231312
## Objective function: 0.550237
## Clustering vector: int [1:1028136] 1 2 3 2 3 4 1 2 1 3 1 3 4 1 2 2 3 2 ...
## Cluster sizes: 35174 450112 411728 131122
## Best sample:
## [1] 2558 2887 3609 4236 6590 6699 8268 9570 9868
## [10] 10480 11029 11421 13335 13398 16144 16536 16865 17257
## [19] 19313 19956 21603 22042 22074 23815 25415 26529 27690
## [28] 27816 28490 29761 30357 32961 33118 35299 36115 37777
## [37] 37809 38954 38970 40727 41056 41103 42766 43394 43441
## [46] 44508 45433 46312 47881 48618 49700 49983 51238 52556
## [55] 53622 54281 54846 55678 55693 56133 57403 58643 61043
## [64] 61451 62502 62973 63380 64306 65781 66032 66769 67365
## [73] 71727 74441 75272 77155 79492 81924 82551 83414 85736
## [82] 86395 86662 87885 88372 88623 89313 91650 91713 92905
## [91] 94851 94961 99322 100216 100232 100702 101581 101973 102130
## [100] 102350 105629 106523 110963 111778 112641 113614 114430 115214
## [109] 116124 116312 119120 119481 121081 121411 121709 122791 122948
## [118] 128298 129553 130133 130416 130871 132283 132800 133491 134008
## [127] 134353 134448 135656 136550 136958 139138 139264 141178 141288
## [136] 142449 143264 143500 144880 145680 146700 147908 148143 149351
## [145] 149665 150685 150983 151093 152222 152285 153587 155140 155391
## [154] 155470 155956 157744 159376 159517 160866 162137 163220 163267
## [163] 163800 167110 167126 167502 168052 168789 169965 170232 170420
## [172] 171221 172899 173166 173182 173888 174311 174374 175095 176586
## [181] 177731 177982 179833 180445 181465 184226 184540 184838 185356
## [190] 185873 189262 191929 192509 192745 194204 194408 194439 194627
## [199] 195270 195600 196949 197733 200024 200322 200761 202063 202942
## [208] 203554 204511 206315 206331 206409 206534 206613 206629 207868
## [217] 208480 209154 210911 211476 211633 212323 212763 215273 216292
## [226] 216732 217955 219132 219775 222254 223227 223399 223995 225078
## [235] 227776 228357 228717 231902 231981 233298 233785 234397 235134
## [244] 238821 240170 243292 244343 244751 247167 248281 249536 249865
## [253] 251481 253160 254069 255387 256846 258603 258980 260909 262133
## [262] 263153 264455 265757 266432 266996 267091 267342 268032 268377
## [271] 269256 271060 271844 271954 272158 272754 272942 273680 273805
## [280] 275264 276111 276158 278951 286183 288913 289085 289603 289681
## [289] 289948 290466 290497 290544 290638 291078 293180 293588 296098
## [298] 297839 299079 299894 301667 302310 304538 307613 309135 310217
## [307] 311174 312100 312555 315818 316147 317983 319850 319960 320226
## [316] 324289 324682 325435 326596 327851 329404 329796 330518 332573
## [325] 333969 334110 334675 335302 335334 337907 341468 341625 342048
## [334] 342503 342519 343617 345845 345892 346221 346378 349375 351038
## [343] 351069 351257 351618 352214 352230 352810 353579 353814 354364
## [352] 354662 356450 356701 357423 358693 359572 359996 361141 361878
## [361] 362961 364498 365706 365784 367542 368263 368467 368953 369393
## [370] 369518 371903 374303 374413 376750 379041 379292 379480 380625
## [379] 381096 383967 384987 385128 385253 387199 387701 388532 390415
## [388] 391764 392752 396392 396674 398494 399655 401632 401946 404911
## [397] 406166 408111 411406 413116 413476 413492 414841 415610 415673
## [406] 418889 419783 421744 422387 423470 427690 428474 428788 429384
## [415] 429572 431878 431894 432381 432741 434404 434671 436052 436930
## [424] 438640 440852 443174 443833 445543 445810 446061 446233 446578
## [433] 446751 447174 447269 447833 448351 450218 452399 452524 452728
## [442] 453010 455709 456760 457670 458140 458940 461168 464243 466847
## [451] 468400 468651 468730 469216 472777 473530 474723 476480 476527
## [460] 476558 476856 477060 478849 480229 480386 482049 482065 483226
## [469] 483492 483634 484481 485453 486159 487571 487634 487854 488246
## [478] 490034 490599 490991 493093 494725 496702 497486 498098 498255
## [487] 498616 499133 502522 504138 505581 505769 507479 507668 507699
## [496] 507887 508123 508531 509660 509723 510209 512719 512829 514021
## [505] 515182 515324 516814 517771 519575 519669 520705 521238 521740
## [514] 522415 524893 524940 526148 526227 527670 527858 528533 529553
## [523] 530337 530619 531043 531812 532533 535169 535420 537256 537726
## [532] 538903 541664 541978 547045 548394 549947 551642 555234 557603
## [541] 557760 558011 558199 562796 563125 563847 563972 564067 565918
## [550] 566420 566561 567330 567816 568036 568647 570106 571644 572240
## [559] 572303 573730 574170 575393 576413 577213 579017 579692 580351
## [568] 580665 582516 583661 585104 585214 585418 586202 586736 588524
## [577] 589371 589418 590736 591034 591834 592211 595270 596259 597608
## [586] 598973 601012 601781 602189 602863 603208 603804 604463 604605
## [595] 606848 607303 609358 609640 610597 610801 611099 611507 611868
## [604] 612339 612825 615571 621626 621893 624434 624528 624779 625360
## [613] 625470 625815 630161 631117 631243 631353 633486 636389 637816
## [622] 638695 640170 640264 640797 641111 642083 644876 646131 646523
## [631] 646664 647119 647229 647370 647904 647935 649959 651167 654728
## [640] 654885 655277 655309 655764 655779 659105 659152 660376 664110
## [649] 664878 666573 666839 667075 667922 669349 669710 670683 670887
## [658] 673256 677397 678966 679045 679327 679955 681523 681727 682778
## [667] 686544 686842 687673 689116 690011 690183 692599 692740 692772
## [676] 694356 694733 695345 697227 698388 698514 699486 699533 699957
## [685] 700459 701055 703659 703816 705024 707597 707911 709056 709652
## [694] 711252 711754 711864 712100 714139 714861 715206 718485 719112
## [703] 719316 723191 724446 724666 725356 726376 728274 728933 731741
## [712] 734832 735004 735647 736479 736730 738691 740291 741405 742566
## [721] 742691 743366 745139 745154 747664 750175 750190 751900 754112
## [730] 755603 755932 756434 757093 757642 758270 759070 759383 759493
## [739] 759838 760435 761093 764859 765988 766271 767431 768498 769722
## [748] 770553 770930 772279 774899 776327 777378 777848 778256 779182
## [757] 781645 786791 787983 789818 790116 792109 793489 794446 795325
## [766] 796800 796894 797427 798714 800612 801114 801506 801537 802761
## [775] 803295 803498 804189 806589 809836 809962 811515 814198 815578
## [784] 816849 817006 817398 820740 821383 822920 822983 825838 825980
## [793] 826089 826654 827517 828443 828490 833965 834498 835957 836585
## [802] 837824 838201 839409 839487 840930 841119 843174 843597 843880
## [811] 844303 844429 845009 845072 845292 845747 845794 846813 848429
## [820] 848680 849229 849323 850531 850986 851363 851426 852163 854924
## [829] 856054 856163 858140 861576 863019 863207 864227 864541 864902
## [838] 865561 865969 867098 867161 868494 871020 871459 874252 875115
## [847] 875742 877013 877107 877233 877327 879178 879821 881076 881296
## [856] 881986 882378 882927 884904 885296 885563 886991 888042 888057
## [865] 888763 889971 890473 891462 892858 893925 895321 896921 899416
## [874] 899996 903996 904295 905095 906805 907620 908530 909079 909519
## [883] 910476 912233 912609 914272 914900 915041 915198 915449 917818
## [892] 917865 920563 921410 921489 922744 922901 923858 924062 924768
## [901] 925128 925787 926085 926352 927199 927638 931608 932831 934008
## [910] 934478 934651 934886 935153 937130 937789 938040 938275 938730
## [919] 939954 942652 943232 943421 944378 946778 946856 948174 949649
## [928] 951077 953430 953524 953696 954057 955046 955344 958136 958168
## [937] 959391 959783 959925 960380 961164 962042 963156 963219 966357
## [946] 968537 973479 973636 977370 979331 979833 981872 982217 982610
## [955] 982908 983253 984147 985120 985935 987630 988555 988681 992587
## [964] 993215 999804 1000102 1001059 1002377 1003443 1003789 1003961 1004557
## [973] 1004808 1005342 1005373 1005514 1005859 1005954 1007993 1008056 1008605
## [982] 1012746 1012794 1013217 1014315 1014770 1016543 1016920 1017076 1019414
## [991] 1020857 1021171 1022316 1024513 1025093 1025124 1025360 1027148 1027399
## [1000] 1028121
##
## Available components:
## [1] "sample" "medoids" "i.med" "clustering" "objective"
## [6] "clusinfo" "diss" "call" "silinfo" "data"
Plot the output
fviz_cluster(clara_clust_4, geom="point", ellipse.type="norm")
fviz_cluster(clara_clust_4, palette=c("#00AFBB", "#FC4E07", "#E7B800","#7E57C2"), ellipse.type="t", geom="point", pointsize=1, ggtheme=theme_classic())
fviz_silhouette(clara_clust_4)
## cluster size ave.sil.width
## 1 1 36 0.58
## 2 2 383 0.51
## 3 3 446 0.29
## 4 4 135 0.28
5 clusters and 6 samples specified
clara_clust_5<-clara(geo_z, 5, metric="euclidean", stand=FALSE, samples=6,
sampsize=1000, trace=0, medoids.x=TRUE,
rngR=FALSE, pamLike=FALSE, correct.d=TRUE)
class(clara_clust_5)
## [1] "clara" "partition"
clara_clust_5
## Call: clara(x = geo_z, k = 5, metric = "euclidean", stand = FALSE, samples = 6, sampsize = 1000, trace = 0, medoids.x = TRUE, rngR = FALSE, pamLike = FALSE, correct.d = TRUE)
## Medoids:
## Lat Lon
## [1,] -2.3077290 3.2533604
## [2,] 0.4280980 -0.1780550
## [3,] 0.9914285 0.3072476
## [4,] -1.3745597 -0.0305779
## [5,] -0.3238258 -0.4507162
## Objective function: 0.4873376
## Clustering vector: int [1:1028136] 1 2 2 2 3 4 1 5 4 2 1 3 4 1 5 5 3 5 ...
## Cluster sizes: 35189 328853 180843 115904 367347
## Best sample:
## [1] 2558 2887 3609 4236 6590 6699 8268 9570 10480
## [10] 11029 11421 13335 13398 16144 16536 16865 17257 19313
## [19] 19956 21603 22042 22074 23815 25415 26529 27690 27816
## [28] 28490 29761 30357 32961 33118 35299 36115 37777 37809
## [37] 38954 38970 40727 41056 41103 42766 43394 43441 44508
## [46] 45433 46312 47881 48618 49700 49983 51238 52556 53622
## [55] 54281 54846 55678 55693 56133 57403 58643 61043 61451
## [64] 62502 62973 63380 64306 65781 66032 66769 67365 71727
## [73] 74441 75272 77155 79492 81924 82551 83414 85736 86395
## [82] 86662 87195 87885 88372 88623 89313 91650 91713 92905
## [91] 94851 94961 99322 100216 100232 100702 101581 101973 102130
## [100] 102350 104499 105629 106523 110963 111778 112641 113614 114430
## [109] 115214 116124 116312 119120 119481 121081 121411 121709 122791
## [118] 122948 128298 129553 130133 130416 130871 132283 132800 133491
## [127] 134008 134353 134448 135656 136550 136958 139138 139264 141178
## [136] 141288 142449 143264 143500 144880 145680 146700 147908 148143
## [145] 149351 149665 150685 150983 151093 152222 152285 153587 155140
## [154] 155391 155470 155956 157744 159376 159517 160866 162137 163220
## [163] 163267 163800 167110 167126 167502 168052 168789 169965 170232
## [172] 170420 171221 172899 173166 173182 173888 174311 174374 175095
## [181] 176586 177731 177982 179833 180445 181465 184226 184540 184838
## [190] 185356 185873 189262 191929 192509 192745 194204 194408 194439
## [199] 194627 195270 195600 196949 197733 200024 200322 200761 202063
## [208] 202942 203554 203569 204511 206315 206331 206409 206534 206613
## [217] 206629 207868 208480 209154 210911 211476 211633 212323 212763
## [226] 215273 216292 216732 217955 219132 219775 222254 223227 223399
## [235] 223995 225078 227776 228357 228717 231902 231981 233298 233785
## [244] 234397 235134 238821 240170 243292 244343 244751 247167 248281
## [253] 249536 249865 251481 253160 254069 255387 256846 258603 258980
## [262] 260909 262133 263153 264455 265757 266432 266996 267091 267342
## [271] 268032 268377 269256 271060 271844 271954 272158 272754 272942
## [280] 273680 273805 275264 276111 276158 278951 280865 286183 288913
## [289] 289085 289603 289681 289948 290466 290497 290544 290638 291078
## [298] 293180 293588 296098 297839 299079 299894 301667 302310 304538
## [307] 307613 309135 310217 311174 312100 312555 315818 316147 317983
## [316] 319850 319960 320226 324289 324682 325435 326596 327851 329404
## [325] 329796 330518 332573 333969 334110 334675 335302 335334 337907
## [334] 341468 341625 342048 342503 342519 343617 345845 345892 346221
## [343] 346378 349375 351038 351069 351257 351618 352214 352230 353579
## [352] 353814 354364 354662 356450 356701 357423 358693 359572 359996
## [361] 361141 361878 362961 364498 365706 365784 367542 368263 368467
## [370] 368953 369393 369518 371903 374303 374413 376750 379041 379292
## [379] 379480 380625 381096 383967 384987 385128 385253 387199 387701
## [388] 388532 390415 391764 392752 396392 396674 398494 399655 401632
## [397] 401946 404911 406166 408111 411406 413116 413476 413492 414841
## [406] 415610 415673 418889 419783 421744 422387 423470 427690 428474
## [415] 428788 429384 429572 431878 431894 432381 432741 434404 434671
## [424] 436052 436930 438640 440852 443174 443833 445543 445810 446061
## [433] 446233 446578 446751 447174 447269 447833 450218 452399 452524
## [442] 452728 453010 455709 456760 457670 458140 458940 461168 464243
## [451] 468400 468651 468730 469216 472777 473530 474723 476480 476527
## [460] 476558 476856 477060 478849 480229 480386 481186 482049 482065
## [469] 483226 483492 483634 484481 485453 486159 487571 487634 487854
## [478] 487885 488246 490034 490599 490991 493093 494725 496702 497486
## [487] 498098 498255 498616 499133 502522 504138 505581 505769 507479
## [496] 507668 507699 507887 508123 508531 509660 509723 510209 512719
## [505] 512829 514021 515182 515324 516814 517771 519575 519669 520705
## [514] 521238 521740 522415 524893 524940 526148 526227 527670 527858
## [523] 528533 529553 530337 530619 531043 531812 532533 535169 535420
## [532] 537256 537726 538903 541664 541978 547045 548394 549947 551642
## [541] 555234 557603 557760 558011 558199 562796 563125 563847 563972
## [550] 564067 565918 566420 566561 567330 567816 568036 568647 570106
## [559] 571644 572240 572303 573730 574170 575393 576413 577213 579017
## [568] 579692 580351 580665 582516 583661 585104 585214 585418 586202
## [577] 586736 588524 589371 589418 590736 591034 591834 592211 595270
## [586] 596259 597608 598973 601012 601781 602189 602863 603208 603804
## [595] 604463 604605 606848 607303 609358 609640 610597 610801 611099
## [604] 611507 611868 612339 612825 615571 621626 621893 624434 624528
## [613] 624779 625360 625470 625815 630161 631117 631243 631353 633486
## [622] 636389 637816 638695 640170 640264 640797 641111 642083 644876
## [631] 646131 646523 646664 647119 647229 647370 647904 647935 649959
## [640] 651167 654728 654885 655277 655309 655764 655779 659105 659152
## [649] 660376 664110 664878 666573 666839 667075 667922 669349 669710
## [658] 670683 670887 673256 677397 678966 679045 679327 679955 681523
## [667] 681727 682778 686544 686842 687673 689116 690011 690183 692599
## [676] 692740 692772 694356 694733 695345 697227 698388 698514 699486
## [685] 699533 699957 700459 701055 703659 703816 707597 707911 709056
## [694] 709652 711252 711754 711864 712100 714139 714861 715206 718485
## [703] 719112 719316 723191 724446 724666 725356 726376 728274 728933
## [712] 731741 734832 735004 735647 736479 736730 738691 740291 741405
## [721] 742566 742691 743366 744637 745139 745154 747664 750175 750190
## [730] 751900 754112 755603 755932 756434 757093 757642 758270 759070
## [739] 759383 759493 759838 760435 761093 764859 765988 766271 767431
## [748] 768498 769722 770553 770930 772279 776327 777378 777848 778256
## [757] 779182 781645 786791 787983 789818 790116 792109 793489 794446
## [766] 795325 796800 796894 797427 798714 800612 801114 801506 801537
## [775] 802761 803295 803498 804189 806589 809836 809962 811515 814198
## [784] 815578 816849 817006 817398 820740 821383 822920 822983 825838
## [793] 825980 826089 826654 827517 828443 828490 833965 834498 835957
## [802] 836585 837824 838201 839409 839487 840930 841119 843174 843597
## [811] 843880 844303 844429 845009 845072 845292 845747 845794 846813
## [820] 848429 848680 849229 849323 850531 850986 851363 851426 852163
## [829] 854924 856054 856163 858140 861576 863019 863207 864227 864541
## [838] 864902 865561 865969 867098 867161 868494 871020 871459 874252
## [847] 875115 875742 877013 877107 877233 877327 879178 879821 881076
## [856] 881296 881986 882378 882927 884904 885296 885563 886991 888042
## [865] 888057 888763 889971 890473 891462 892858 893925 895321 896921
## [874] 899416 899996 903996 904295 905095 906805 907620 908530 909079
## [883] 909519 910476 912233 912609 914272 914900 915041 915198 915449
## [892] 917818 917865 920563 921410 921489 922744 922901 923858 924062
## [901] 924768 925128 925787 926085 926352 927199 927638 931608 932831
## [910] 934008 934478 934651 934886 935153 937130 937789 938040 938275
## [919] 938730 939954 942652 943232 943421 946778 946856 948174 949649
## [928] 951077 953430 953524 953696 954057 955046 955344 958136 958168
## [937] 959391 959783 959925 960380 961164 962042 963156 963219 966357
## [946] 968537 973479 973636 977370 979331 979833 981872 982217 982610
## [955] 982908 983253 984147 985120 985935 987630 988555 988681 992587
## [964] 993215 999804 1000102 1001059 1002377 1003443 1003789 1003961 1004557
## [973] 1004808 1005342 1005373 1005514 1005859 1005954 1007993 1008056 1008605
## [982] 1012746 1012794 1013217 1014315 1014770 1016543 1016920 1017076 1019414
## [991] 1020857 1021171 1022316 1024513 1025093 1025124 1025360 1027148 1027399
## [1000] 1028121
##
## Available components:
## [1] "sample" "medoids" "i.med" "clustering" "objective"
## [6] "clusinfo" "diss" "call" "silinfo" "data"
Plot the output
fviz_cluster(clara_clust_5, geom="point", ellipse.type="norm")
fviz_cluster(clara_clust_5, palette=c("#00AFBB", "#FC4E07", "#E7B800","#7E57C2","#4DB6AC"), ellipse.type="t", geom="point", pointsize=1, ggtheme=theme_classic())
fviz_silhouette(clara_clust_5)
## cluster size ave.sil.width
## 1 1 37 0.59
## 2 2 325 0.59
## 3 3 201 -0.09
## 4 4 123 0.23
## 5 5 314 0.33
6 clusters and 10 samples specified
clara_clust_6<-clara(geo_z, 6, metric="euclidean", stand=FALSE, samples=10,
sampsize=1000, trace=0, medoids.x=TRUE,
rngR=FALSE, pamLike=FALSE, correct.d=TRUE)
class(clara_clust_6)
## [1] "clara" "partition"
clara_clust_6
## Call: clara(x = geo_z, k = 6, metric = "euclidean", stand = FALSE, samples = 10, sampsize = 1000, trace = 0, medoids.x = TRUE, rngR = FALSE, pamLike = FALSE, correct.d = TRUE)
## Medoids:
## Lat Lon
## [1,] -2.3077290 3.255075201
## [2,] 0.3938083 -0.190058975
## [3,] 0.9595881 0.187208083
## [4,] -1.3280237 0.008863656
## [5,] -0.2919854 -0.402700407
## [6,] 0.7562992 1.823175233
## Objective function: 0.4406954
## Clustering vector: int [1:1028136] 1 2 2 2 3 4 1 5 1 2 1 3 4 1 5 5 3 5 ...
## Cluster sizes: 30283 313464 153436 118516 365182 47255
## Best sample:
## [1] 157 393 1240 2667 3028 3279 4001 5272 6574
## [10] 7719 8456 9539 10715 12284 12363 14841 15045 16097
## [19] 18481 20881 20991 23329 24050 25619 26058 26090 27204
## [28] 27674 30545 31565 31706 31832 33777 34373 36977 36993
## [37] 38342 42970 45072 47457 48524 52634 52744 56509 57984
## [46] 59694 60055 61419 62188 62251 65467 66361 68322 68965
## [55] 70048 73609 75052 75366 76151 76684 78457 78472 79320
## [64] 80983 83508 85218 87430 88921 89752 90411 92388 92639
## [73] 92811 93156 93329 93753 94412 96796 98977 99102 99306
## [82] 99589 100750 101816 102287 103338 104248 104719 105519 105597
## [91] 109645 110821 114979 115308 115794 120109 121301 123058 123136
## [100] 123435 125427 126807 126964 127764 128643 130118 130212 131059
## [109] 132032 134150 134432 134824 136079 136613 137177 139672 139907
## [118] 143155 143280 144676 144833 145194 145712 147516 149100 150716
## [127] 152159 152348 154058 154466 154701 155109 156238 156301 156787
## [136] 159298 159407 159972 160835 161761 163392 166153 167283 167816
## [145] 168993 169275 169903 171142 171472 171519 172727 172805 174248
## [154] 174437 175111 176915 177198 177621 178327 178390 179065 179112
## [163] 180131 181747 181998 182547 183834 183849 184304 184681 185481
## [172] 188242 188556 189481 193623 194972 196525 197545 197859 198220
## [181] 199287 201812 204338 204777 207570 208433 209060 209672 210425
## [190] 210551 210645 212496 213139 214394 214614 215226 215304 218222
## [199] 218881 220309 220748 221972 223791 224780 225596 226270 226929
## [208] 227243 228639 229094 230239 231683 231792 231996 232734 233314
## [217] 235103 235997 237315 237613 238413 238789 240123 241848 242837
## [226] 244186 245551 247590 248218 248359 248767 249441 249787 250383
## [235] 251042 251183 253881 254807 255936 256219 257176 257380 258086
## [244] 258446 258917 259403 259670 264926 266149 267797 268204 268471
## [253] 270448 271013 271107 271358 271593 272048 272393 273272 276739
## [262] 277696 277821 277931 280065 282967 284395 285273 286748 286842
## [271] 287375 288662 291454 291486 292505 292709 293101 293243 293698
## [280] 293807 293949 294482 294513 296537 301463 301855 302357 305683
## [289] 306797 306954 310688 313151 315190 315928 316288 316571 317465
## [298] 318438 319834 321999 323976 325544 325623 325905 326533 328306
## [307] 329357 333122 333420 334251 335695 336589 336761 338126 339177
## [316] 339319 339350 340935 341311 341374 341923 344966 346065 346112
## [325] 346535 347037 347633 348088 350238 350394 354175 354489 355634
## [334] 356230 357831 358442 358678 359258 360466 360717 361439 365063
## [343] 365690 365894 369722 369769 371024 371244 371934 372483 372876
## [352] 374852 375511 378319 381410 383057 383308 383496 385269 386869
## [361] 387983 389144 389270 389944 391215 391717 394243 396753 396768
## [370] 398479 400408 400424 400691 402181 402510 402557 403671 404220
## [379] 404848 405648 405962 407672 407766 411437 412566 412692 412849
## [388] 414010 415077 416300 417132 417508 417587 418857 422905 423956
## [397] 424427 424835 425760 427486 428223 433369 434561 435895 436397
## [406] 436726 438687 440068 440946 441025 441903 443378 443472 444005
## [415] 445292 447190 447849 448084 448116 449339 449826 449873 450077
## [424] 450767 453167 456305 456415 456540 460776 461686 462157 463427
## [433] 463584 467318 472417 472558 472668 473232 474095 475021 475068
## [442] 475884 477578 480543 480574 481076 482535 482865 483163 484245
## [451] 484402 486065 487509 489752 490176 491007 491587 491650 491870
## [460] 492325 493392 493737 495007 495462 495808 495902 497110 497941
## [469] 498004 498741 501502 502632 502742 504718 508154 509362 509597
## [478] 509786 510805 511119 512139 512547 513676 513739 515041 515073
## [487] 516845 518038 519199 520830 520971 521693 522320 523591 523685
## [496] 523905 524721 525254 525756 526399 527654 528564 528957 529506
## [505] 531420 531482 531686 531875 533569 534353 534620 534636 535342
## [514] 536550 538040 539185 539436 540503 541899 542919 543499 545680
## [523] 545994 546574 550873 551673 553383 554199 555658 555862 555893
## [532] 557054 558811 559188 560850 561478 561619 561776 562027 562215
## [541] 563517 564396 564443 565965 567141 567785 567863 567989 568067
## [550] 568083 569322 569934 570436 570640 571346 571707 572366 572664
## [559] 572930 573777 574217 578186 579409 580586 581057 581229 581465
## [568] 581731 583708 584367 584681 584853 586532 589230 589811 589999
## [577] 593356 593435 594752 595851 596227 597655 600008 600102 600275
## [586] 600635 601624 604746 605797 605969 606362 607742 608621 609735
## [595] 609797 610990 611319 612935 614614 615116 615524 620057 620214
## [604] 620434 624607 625909 626411 628451 628796 629486 629831 630725
## [613] 631698 632514 634208 634396 635134 635259 637565 639165 639793
## [622] 646382 646680 647637 648955 650022 650367 650539 651136 651387
## [631] 651920 651951 652092 652438 652532 654571 654634 655042 655183
## [640] 659325 659372 659795 661348 663121 663764 665992 667435 667749
## [649] 668894 669067 670589 671091 671671 671703 671938 673554 673726
## [658] 674699 677272 677601 678323 678951 681304 681414 682982 684284
## [667] 685194 685743 686136 688050 688112 689305 690858 691250 691579
## [676] 691972 694027 694670 696757 696788 698529 699361 701243 702404
## [685] 702530 702922 703502 703957 704475 705071 707675 707832 710013
## [694] 710829 712492 712523 712711 713668 713684 715269 715771 715818
## [703] 716116 718108 718155 718877 719222 720148 721026 722595 723332
## [712] 724415 724697 725952 728996 729560 729717 730392 730408 730847
## [721] 733357 735757 737216 737687 739020 740495 740746 741483 742079
## [730] 745421 746441 746707 749155 749986 751869 753218 754206 757266
## [739] 758128 759948 760450 761109 761376 763086 763337 763400 764027
## [748] 766365 766522 767620 769565 774570 774930 774946 776295 776687
## [757] 777064 780343 781237 783198 785677 788328 789144 789928 790242
## [766] 790838 791026 793348 793835 794195 796125 797506 803012 804267
## [775] 804628 805130 806997 807515 807687 808032 808205 808723 809162
## [784] 811264 811672 813853 813978 815892 817163 817979 818214 819594
## [793] 820395 821414 822622 822858 825399 825697 826936 826999 828301
## [802] 829854 830105 830184 830670 832459 834231 836851 837934 837981
## [811] 838514 841840 842217 842766 843503 844680 844946 845135 845935
## [820] 847613 847880 847896 848602 849025 849088 849308 849810 852053
## [829] 852445 852696 854548 856179 858940 859552 859709 860070 860587
## [838] 863976 865592 867035 867224 867459 868918 869122 869153 869341
## [847] 869577 869985 870314 871114 871177 871663 872448 875036 875475
## [856] 876778 877656 878268 879225 881029 881045 881123 881249 881343
## [865] 882582 883194 883869 885626 886347 886394 887038 887477 887602
## [874] 889312 889987 891007 892074 892497 893987 894489 896874 897941
## [883] 898710 899180 902490 903071 903432 906616 906695 908013 908499
## [892] 909111 909848 913096 913535 914884 919057 919214 919465 921881
## [901] 922995 924250 924579 925426 926195 927874 928784 929490 930101
## [910] 931560 933694 933757 935624 936847 937867 939169 940471 941146
## [919] 941805 942056 942746 943970 945774 946558 946668 946872 947468
## [928] 947656 948394 949978 950825 950873 952190 953665 956724 959062
## [937] 963627 963800 964317 964396 964662 965180 965259 965353 965917
## [946] 968302 970812 971095 972554 973793 977025 979252 982327 983849
## [955] 984931 985888 986234 986814 986924 987269 990862 992572 992697
## [964] 992807 994564 994674 994940 1000149 1001310 1002565 1003538 1004118
## [973] 1004510 1005232 1006330 1007287 1008118 1008573 1008683 1008824 1009389
## [982] 1010048 1010801 1012621 1016182 1016339 1016763 1017218 1017233 1018331
## [991] 1020559 1020606 1020936 1021093 1024089 1025564 1025752 1025783 1025972
## [1000] 1026332
##
## Available components:
## [1] "sample" "medoids" "i.med" "clustering" "objective"
## [6] "clusinfo" "diss" "call" "silinfo" "data"
Plot the output
fviz_cluster(clara_clust_6, geom="point", ellipse.type="norm")
fviz_cluster(clara_clust_6, palette=c("#00AFBB", "#FC4E07", "#E7B800","#7E57C2","#4DB6AC","#F06292"), ellipse.type="t", geom="point", pointsize=1, ggtheme=theme_classic())
fviz_silhouette(clara_clust_6)
## cluster size ave.sil.width
## 1 1 34 0.68
## 2 2 281 0.50
## 3 3 159 0.23
## 4 4 132 0.28
## 5 5 350 0.30
## 6 6 44 0.44
The highest average silhouette score attains iteration with three
clusters.Continue evaluating three clusters.
clara_clust_3$medoids
## Lat Lon
## [1,] -1.3451686 0.20092689
## [2,] -0.2087105 -0.40955981
## [3,] 0.6191405 0.01229336
clara_clust_3$i.med
## [1] 585418 443174 77155
uber_data$Weekday <- weekdays(uber_data$Date)
uber_data <- cbind(uber_data,geo_z,clara_clust_3$clustering)
colnames(uber_data) <- c("Date.Time", "Lat", "Lon",
"Base", "Date", "Time","Weekday","Lat_z", "Lon_z", "cluster")
uber_data$cluster <- as.factor(uber_data$cluster)
medoids <- uber_data[c(clara_clust_3$i.med), -c(1)]
medoids
## Lat Lon Base Date Time Weekday Lat_z
## 585418 40.6843 -73.9601 B02617 2014-09-25 10:34:00 Thursday -1.3451686
## 443174 40.7307 -73.9957 B02617 2014-09-13 18:30:00 Saturday -0.2087105
## 77155 40.7645 -73.9711 B02598 2014-09-06 14:23:00 Saturday 0.6191405
## Lon_z cluster
## 585418 0.20092689 1
## 443174 -0.40955981 2
## 77155 0.01229336 3
Two medoids were registered in the evening and one in the early morning. Seems like there was a wrong bias regarding clustering method, because the early morning in the weekday isn’t a popular time for taxi rides. Each of the medoids represents different Weekday, day of September and has different bases.
Plot the points on the map.
Data frame with coordinates: identify the places under those longitude and latitude.
locations <- data.frame(lat = c(medoids$Lat),lon = c(medoids$Lon))
locations$address <- apply(locations, 1, function(row) {revgeocode(c(row['lon'], row['lat']))})
## Warning: Reverse geocoding failed with error:
## You must enable Billing on the Google Cloud Project at https://console.cloud.google.com/project/_/billing/enable Learn more at https://developers.google.com/maps/gmp-get-started
## Warning: Reverse geocoding failed with error:
## You must enable Billing on the Google Cloud Project at https://console.cloud.google.com/project/_/billing/enable Learn more at https://developers.google.com/maps/gmp-get-started
## Warning: Reverse geocoding failed with error:
## You must enable Billing on the Google Cloud Project at https://console.cloud.google.com/project/_/billing/enable Learn more at https://developers.google.com/maps/gmp-get-started
print(locations)
## lat lon address
## 1 40.6843 -73.9601 <NA>
## 2 40.7307 -73.9957 <NA>
## 3 40.7645 -73.9711 <NA>
The places under these addresses are:
- for cluster 1 is John
F.Kennedy International Airport;
- for cluster 2 is Midtown
Manhattan, near Central Park;
- for the cluster 3 is Greenwich
Village or Lower Manhattan, near NY University.
This destinations are quite reasonable, especially the place near the park and airport.
uber_data %>%
group_by(cluster) %>%
summarize(counts = n())
## # A tibble: 3 × 2
## cluster counts
## <fct> <int>
## 1 1 155513
## 2 2 446220
## 3 3 426403
uber_data %>%
group_by(cluster, Date) %>%
summarize(counts = n()) %>%
ggplot(aes(y=counts, x = Date, color=cluster)) +
geom_line(size = 1.2) +
theme_minimal() +
labs(x = "", y = "", title = "Number of Uber assigned clusters by Date - Sep 2014")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
We can observe that clusters 2 and 3 have pretty the same trend of pickups, however the second cluster has lower pickups numbers (494 041 vs 446 087). The smallest number of pickups has the first cluster (36 600). Seems like destinations to the Parks (clusters 2 and 3) much more popular than toward the airport (cluster 1). Additionally, cluster 1 is stable over the time comparing with clusters 2 and 3.
uber_data %>%
group_by(cluster, Weekday) %>%
summarize(counts = n()) %>%
ggplot(aes(y=counts, x = Weekday, fill = cluster)) +
geom_col(position = position_dodge()) +
theme_minimal() +
theme(axis.text.x = element_text(size=12)) +
labs(x = "", y = "", title = "Number of Uber pickups by Weekday - Sep 2014")
Cluster 3 is popular during Friday and Saturday. We can guess that this area is popular for evening activities (later we can check this bias). For the second cluster we can observe peaks on Tuesday and Thursday and the lowest values during the weekend. We can see again, that the first cluster is quite stable during the week.
uber_data <- uber_data %>%
mutate(Time = as.POSIXct(Time, format = "%H:%M")) %>%
filter(format(Time, "%H:%M") %in% c("00:00", "10:00", "20:00"))
uber_data %>%
group_by(cluster, Time) %>%
summarize(counts = n(), .groups = "drop") %>%
ggplot(aes(y = counts, x = Time, color = cluster)) +
geom_line(size = 0.9, alpha = 0.7) +
theme_minimal() +
labs(x = "Time", y = "Number of Pickups", title = "Number of Uber Pickups by Cluster and Time - Sep 2014") +
scale_x_datetime(date_labels = "%H:%M", date_breaks = "10 hours")
All three clusters show an upward trend toward the evening, with orders starting to rise from the first half of the day—most noticeably in clusters 2 and 3.
Split as 95% : 5% and select a random period ‘2014-09-14’
set.seed(111)
uber_fr <- uber_data[uber_data$Date=='2014-09-14',c(2:4,6)]
uber_z_fr <- as.data.frame(lapply(uber_fr[,1:2], scale))
split = sample.split(1:nrow(uber_z_fr), SplitRatio = 0.95)
training_set = subset(uber_z_fr, split == TRUE)
test_set = subset(uber_z_fr, split == FALSE)
uber_fr <- cbind(uber_fr,uber_z_fr,split)
colnames(uber_fr) <- c('Lat','Lon','Base','Time','Lat_z','Lon_z','split')
pred_kcca = kcca(uber_fr[split==TRUE, 5:6], k=3, kccaFamily("kmeans"))
pred_kcca
## kcca object of family 'kmeans'
##
## call:
## kcca(x = uber_fr[split == TRUE, 5:6], k = 3, family = kccaFamily("kmeans"))
##
## cluster sizes:
##
## 1 2 3
## 14 3 57
pred_train <- predict(pred_kcca)
pred_test <- predict(pred_kcca, newdata=uber_fr[split==FALSE, 5:6])
image(pred_kcca)
points(uber_fr[split==TRUE, 5:6], col=pred_train, pch=19, cex=0.3)
points(uber_fr[split==FALSE, 5:6], col=pred_test, pch=22, bg="grey")