uber.knit

options(repos = c(CRAN = "https://cloud.r-project.org"))

install.packages("factoextra")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'factoextra' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

install.packages("flexclust")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'flexclust' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'flexclust'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\mevin\AppData\Local\R\win-library\4.5\00LOCK\flexclust\libs\x64\flexclust.dll
## to
## C:\Users\mevin\AppData\Local\R\win-library\4.5\flexclust\libs\x64\flexclust.dll:
## Permission denied

## Warning: restored 'flexclust'

## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

install.packages("fpc")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'fpc' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

install.packages("cluster")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'cluster' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'cluster'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\mevin\AppData\Local\R\win-library\4.5\00LOCK\cluster\libs\x64\cluster.dll
## to C:\Users\mevin\AppData\Local\R\win-library\4.5\cluster\libs\x64\cluster.dll:
## Permission denied

## Warning: restored 'cluster'

## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

install.packages("ClusterR")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'ClusterR' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'ClusterR'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\mevin\AppData\Local\R\win-library\4.5\00LOCK\ClusterR\libs\x64\ClusterR.dll
## to
## C:\Users\mevin\AppData\Local\R\win-library\4.5\ClusterR\libs\x64\ClusterR.dll:
## Permission denied

## Warning: restored 'ClusterR'

## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

install.packages("tidyverse")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'tidyverse' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

install.packages("dendextend")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'dendextend' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

install.packages("stats")

## Warning: package 'stats' is in use and will not be installed

install.packages("dbscan")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'dbscan' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'dbscan'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\mevin\AppData\Local\R\win-library\4.5\00LOCK\dbscan\libs\x64\dbscan.dll
## to C:\Users\mevin\AppData\Local\R\win-library\4.5\dbscan\libs\x64\dbscan.dll:
## Permission denied

## Warning: restored 'dbscan'

## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

install.packages("lubridate")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'lubridate' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'lubridate'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\mevin\AppData\Local\R\win-library\4.5\00LOCK\lubridate\libs\x64\lubridate.dll
## to
## C:\Users\mevin\AppData\Local\R\win-library\4.5\lubridate\libs\x64\lubridate.dll:
## Permission denied

## Warning: restored 'lubridate'

## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

install.packages("forecast")

## Installing package into 'C:/Users/mevin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'forecast' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'forecast'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\mevin\AppData\Local\R\win-library\4.5\00LOCK\forecast\libs\x64\forecast.dll
## to
## C:\Users\mevin\AppData\Local\R\win-library\4.5\forecast\libs\x64\forecast.dll:
## Permission denied

## Warning: restored 'forecast'

## 
## The downloaded binary packages are in
##  C:\Users\mevin\AppData\Local\Temp\Rtmpg3TrOe\downloaded_packages

library(dbscan)

## Warning: package 'dbscan' was built under R version 4.5.2

## 
## Attaching package: 'dbscan'

## The following object is masked from 'package:stats':
## 
##     as.dendrogram

library(stats)
library(factoextra)

## Warning: package 'factoextra' was built under R version 4.5.2

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.5.2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(flexclust)

## Warning: package 'flexclust' was built under R version 4.5.2

library(fpc)

## Warning: package 'fpc' was built under R version 4.5.2

## 
## Attaching package: 'fpc'

## The following object is masked from 'package:dbscan':
## 
##     dbscan

library(clustertend)

## Warning: package 'clustertend' was built under R version 4.5.2

## Package `clustertend` is deprecated.  Use package `hopkins` instead.

library(cluster)

## Warning: package 'cluster' was built under R version 4.5.2

library(ClusterR)

## Warning: package 'ClusterR' was built under R version 4.5.2

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.5.2

## Warning: package 'tidyr' was built under R version 4.5.2

## Warning: package 'readr' was built under R version 4.5.2

## Warning: package 'purrr' was built under R version 4.5.2

## Warning: package 'dplyr' was built under R version 4.5.2

## Warning: package 'stringr' was built under R version 4.5.2

## Warning: package 'forcats' was built under R version 4.5.2

## Warning: package 'lubridate' was built under R version 4.5.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.2.0     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dendextend)

## Warning: package 'dendextend' was built under R version 4.5.2

## 
## ---------------------
## Welcome to dendextend version 1.19.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## 
## Attaching package: 'dendextend'
## 
## The following object is masked from 'package:stats':
## 
##     cutree

library(lubridate)
library(forecast)

## Warning: package 'forecast' was built under R version 4.5.2

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

Let’s load the uber.csv dataset.

uber <- read.csv("C:\\Users\\mevin\\Downloads\\uber-data.csv")

let’s display the structure of uber dataset.

str(uber)

## 'data.frame':    1028136 obs. of  4 variables:
##  $ Date.Time: chr  "9/1/2014 0:01:00" "9/1/2014 0:01:00" "9/1/2014 0:03:00" "9/1/2014 0:06:00" ...
##  $ Lat      : num  40.2 40.8 40.8 40.7 40.8 ...
##  $ Lon      : num  -74 -74 -74 -74 -73.9 ...
##  $ Base     : chr  "B02512" "B02512" "B02512" "B02512" ...

I started pre-processing by checking if there are any NA’s available and removed duplicate rows

colSums(is.na(uber))

## Date.Time       Lat       Lon      Base 
##         0         0         0         0

uber <- unique(uber)

I created a smaller dataframe to store the longitudes and latitudes.

location <- data.frame(uber$Lon,uber$Lat)

I set the seed at “123” and took a random sample of 5000 observations of “location” to proceed with clustering

set.seed(123)
sample <- sample(1:nrow(location),5000)
sample_location <- location[sample,]

In order to check if the dataset is clusterable or not, I calculated the Hopkin’s statistic.

# Hopkins stat
get_clust_tendency(sample_location, 2, graph=TRUE, 
                   gradient=list(low="red", mid="white", high="blue"),
                   seed = 123)

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## $hopkins_stat
## [1] 0.9818681
## 
## $plot

Since the Hopkin’s statistic is closer to 1 (0.9818681), the dataset is clusterable.

I tried to find the optimal number of clusters for k-means.

fviz_nbclust(sample_location, kmeans, method = "silhouette")

Since 2 is the optimal number of clusters, I applied k-means clustering with k = 2

kmean_clust <- kmeans(sample_location, 2)

kmean_clust$size

## [1] 4640  360

fviz_cluster(list(data=sample_location, cluster=kmean_clust$cluster), 
             ellipse.type="convex", geom="point", ggtheme=theme_classic(), 
             main = "K-Means Clustering Results (k=2)")

sil<-silhouette(kmean_clust$cluster, dist(sample_location))
fviz_silhouette(sil)

##   cluster size ave.sil.width
## 1       1 4640          0.73
## 2       2  360          0.29

After using k-means algorithm, we can see the clusters are close to each other, but they have an average silhouette width of 0.7 which shows that this method is good, but I wanted to compare with other clustering algorithms.

I attempted to proceed the same with CLARA.

fviz_nbclust(sample_location, clara, method = "silhouette", k.max = 10)

Since 2 is the optimal number of clusters, I applied CLARA clustering with k = 2

clara_res <- clara(sample_location, k = 2)

fviz_cluster(clara_res, data = sample_location, 
             geom = "point", ellipse.type = "convex", 
             ggtheme = theme_classic(), 
             main = "CLARA Clustering Results (k=2)")

fviz_silhouette(clara_res)

##   cluster size ave.sil.width
## 1       1   39          0.71
## 2       2    5          0.49

After using CLARA , we can see once again that the clusters a close to each other, but they have an average silhouette width of 0.69 which shows that while this method is good, CLARA is comparatively better than K-means.

I then proceeded with Hierarchical clustering.

fviz_nbclust(sample_location, hcut, 
             method = "silhouette", 
             k.max = 10, 
             hc_method = "ward.D2")

Since 2 is the optimal number of clusters, I applied hierarchical clustering with k = 2

hcut_res <- hcut(sample_location, k = 2, hc_method = "ward.D2")

fviz_cluster(hcut_res,data = sample_location,
             geom = "point",
             ellipse.type = "convex",
             ggtheme = theme_minimal(),
             main = "Hierarchical Clustering Results (k=2)")

fviz_silhouette(hcut_res)

##   cluster size ave.sil.width
## 1       1 4850          0.75
## 2       2  150          0.70

After using hierarchical clustering , we can see once again that the clusters a close to each other (with some more obvious intersections between the clusters), but they have an average silhouette width of 0.75 which shows that while this method is good and is the best algorithm so far.

Finally, I proceeded with DBSCAN. I found “eps” by plotting the k-nearest neighbors (k-NN) with k = 10. eps is assigned by locating the elbow point in the k-NN plot.

kNNdistplot(sample_location, k = 10)
abline(h = 0.05, lty = 2)

Now I proceed with DBSCAN with eps = 0.05

db_res <- fpc::dbscan(sample_location, eps = 0.05, MinPts = 10)
print(db_res)

## dbscan Pts=5000 MinPts=10 eps=0.05
##         0    1  2
## border 53   15  3
## seed    0 4903 26
## total  53 4918 29

fviz_cluster(db_res, data = sample_location, 
             geom = "point", ggtheme = theme_classic(), 
             main = "DBSCAN Clustering Results")

The clusters in DBSCAN are more defined and separate from each other with significant gap, compared to the other clustering algorithms.

I extracted the cluster results from the model.

cluster_results_vector <- db_res$cluster

Since clustering was performed on the sampled location data, I matched the cluster assignments back to the corresponding subset of the original Uber dataset using the same sampled indices. This allowed me to reconnect the clustering results with the original trip information, including timestamps.

uber_sample <- uber[sample, ]

I combined the sampled Uber records with their assigned cluster labels into a new dataframe. This dataframe now includes both spatial and temporal information, making it possible to analyze how trip clusters vary over time.

uber_sample_with_clusters <- cbind(uber_sample, cluster = cluster_results_vector)

I converted the Date.Time column into a proper date-time format and created additional columns for the hour of the day and the day of the week.

uber_sample_with_clusters$Date.Time <- mdy_hms(uber_sample_with_clusters$Date.Time)

uber_analysis_data <- uber_sample_with_clusters %>%
  mutate(
    Hour = hour(Date.Time),
    DayOfWeek = wday(Date.Time, label = TRUE, abbr = FALSE)
  )

DBSCAN often assigns some observations as noise, represented by cluster “0.” I filtered out all noise points and retained only valid cluster observations.

print(paste("Original sample size:", nrow(uber_analysis_data)))

## [1] "Original sample size: 5000"

uber_analysis_data_filtered <- uber_analysis_data %>%
  filter(cluster != 0)
  
print(paste("Sample size after filtering noise (Cluster 0):", nrow(uber_analysis_data_filtered)))

## [1] "Sample size after filtering noise (Cluster 0): 4947"

Next, I analyzed how the cluster centers shift across different hours of the day. By grouping the data by cluster and hour, I calculated the mean latitude and longitude for each cluster at every hour, showing how the spatial center of Uber activity changes throughout the day.

cluster_centers_hour <- uber_analysis_data_filtered %>%
  group_by(cluster, Hour) %>%
  summarise(
    mean_Lat = mean(Lat, na.rm = TRUE),
    mean_Lon = mean(Lon, na.rm = TRUE),
    count = n(),
    .groups = "drop"
  )

print(cluster_centers_hour)

## # A tibble: 41 × 5
##    cluster  Hour mean_Lat mean_Lon count
##      <dbl> <int>    <dbl>    <dbl> <int>
##  1       1     0     40.7    -74.0   127
##  2       1     1     40.7    -74.0    80
##  3       1     2     40.7    -74.0    51
##  4       1     3     40.7    -74.0    53
##  5       1     4     40.7    -74.0    58
##  6       1     5     40.7    -74.0    79
##  7       1     6     40.7    -74.0   151
##  8       1     7     40.7    -74.0   198
##  9       1     8     40.7    -74.0   210
## 10       1     9     40.7    -74.0   191
## # ℹ 31 more rows

I visualized these hourly changes using a line plot of mean latitude values across the 24-hour day for each cluster.

ggplot(cluster_centers_hour, aes(x = Hour, y = mean_Lat, color = factor(cluster), group = cluster)) +
  geom_line(size = 1) +
  geom_point(size = 2, alpha = 0.7) +
  labs(
    title = "Cluster Centers by Hour of Day",
    x = "Hour (0–23)",
    y = "Mean Latitude",
    color = "Cluster"
  ) +
  theme_classic()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Cluster 1 remains relatively stable across the 24-hour cycle, suggesting that this area experiences steady ride demand throughout the day. This results in cluster 1 belonging to to a central or high-traffic urban area where trip activity is continuous.

Cluster 2, however, shows noticeable fluctuations in its mean latitude, with activity peaking around the early morning and late evening hours. This variation may indicate that Cluster 2 represents a more residential area, where ride requests increase during typical commuting hours and decline during midday.

Similarly, I explored how cluster centers vary by date. This allows me to observe how the spatial concentration of Uber rides changes across different days.

cluster_centers_date <- uber_analysis_data_filtered %>%
  mutate(Date = as.Date(Date.Time)) %>%
  group_by(cluster, Date) %>%
  summarise(
    mean_Lat = mean(Lat, na.rm = TRUE),
    mean_Lon = mean(Lon, na.rm = TRUE),
    count = n(),
    .groups = "drop"
  )

print(head(cluster_centers_date))

## # A tibble: 6 × 5
##   cluster Date       mean_Lat mean_Lon count
##     <dbl> <date>        <dbl>    <dbl> <int>
## 1       1 2014-09-01     40.7    -73.9    92
## 2       1 2014-09-02     40.7    -74.0   148
## 3       1 2014-09-03     40.7    -74.0   151
## 4       1 2014-09-04     40.7    -74.0   184
## 5       1 2014-09-05     40.7    -74.0   179
## 6       1 2014-09-06     40.7    -74.0   197

I visualized the daily movement of cluster centers over time. The plot shows trends in average latitude for each cluster by date.

ggplot(cluster_centers_date, aes(x = Date, y = mean_Lat, color = factor(cluster), group = cluster)) +
  geom_line(size = 1) +
  geom_point(alpha = 0.6) +
  labs(
    title = "Cluster Centers by Date",
    x = "Date (September 2014)",
    y = "Mean Latitude",
    color = "Cluster"
  ) +
  theme_classic()

Similar to the hourly pattern, Cluster 1 remains geographically stable, confirming that the cluster belongs to an urban region having consistent ride demand.

Cluster 2 shows several distinct peaks and troughs in mean latitude, noticle especially around mid and late September. These fluctuations indicate periodic shifts in ride concentration within that cluster, possibly due to specific events and/or weekends.

Forecasting

I proceeded to develop a forecasting model to predict future Uber ride demand. To achieve this, I selected Cluster 1 and aggregated the number of pickups per day for the month of September 2014. This daily time series was then used to build and compare two forecasting models: ARIMA (Auto-Regressive Integrated Moving Average) and ETS (Exponential Smoothing State Space Model).

These models are well-suited for short-term forecasting of time series data, allowing us to capture both trend and seasonal patterns in daily ride activity. After the model fitting, I generated a 7-day forecast for Cluster 1.

forecast_cluster_id <- 1

cluster_data <- uber_analysis_data_filtered %>%
  filter(cluster == forecast_cluster_id)

if (nrow(cluster_data) == 0) {
  stop(paste("Cluster", forecast_cluster_id, "has no data. Please choose a different cluster."))
}

cluster_daily_counts <- cluster_data %>%
  mutate(Date = floor_date(Date.Time, "day")) %>%
  group_by(Date) %>%
  summarise(Pickups = n(), .groups = 'drop')

all_days <- seq(as.Date("2014-09-01"), as.Date("2014-09-30"), by = "day")
all_days_df <- data.frame(Date = all_days)

cluster_ts_data <- all_days_df %>%
  left_join(cluster_daily_counts, by = "Date") %>%
  mutate(Pickups = ifelse(is.na(Pickups), 0, Pickups)) %>%
  select(Pickups)

cluster_ts <- ts(cluster_ts_data$Pickups, frequency = 7)
print(cluster_ts)

## Time Series:
## Start = c(1, 1) 
## End = c(5, 2) 
## Frequency = 7 
##  [1]  92 148 151 184 179 197 137 151 158 166 189 197 209 125 128 177 163 205 211
## [20] 198 131 141 149 144 184 188 189 134 137 156

ts_decomposition <- decompose(cluster_ts)
plot(ts_decomposition)

print("Fitting auto.arima model...")

## [1] "Fitting auto.arima model..."

fit_arima <- auto.arima(cluster_ts, seasonal = TRUE)
print(summary(fit_arima))

## Series: cluster_ts 
## ARIMA(0,1,0)(1,1,0)[7] 
## 
## Coefficients:
##          sar1
##       -0.6746
## s.e.   0.1372
## 
## sigma^2 = 214.3:  log likelihood = -91.87
## AIC=187.74   AICc=188.37   BIC=189.92
## 
## Training set error measures:
##                     ME    RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set -2.973228 12.2476 8.085387 -1.955667 4.993462 0.5313255 -0.2283172

print("Fitting ets model...")

## [1] "Fitting ets model..."

fit_ets <- ets(cluster_ts)
print(summary(fit_ets))

## ETS(A,N,A) 
## 
## Call:
## ets(y = cluster_ts)
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 165.3734 
##     s = -33.9937 33.0323 28.4239 25.2576 -9.6283 -7.6015
##            -35.4902
## 
##   sigma:  13.9065
## 
##      AIC     AICc      BIC 
## 269.2773 280.8562 283.2892 
## 
## Training set error measures:
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.001404806 11.63505 8.666329 -0.7208716 5.830495 0.5695016
##                   ACF1
## Training set 0.3060706

fore_arima <- forecast(fit_arima, h = 7)
print(fore_arima)

##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## 5.285714       144.9286 126.16839 163.6889 116.23733 173.6199
## 5.428571       186.2778 159.74685 212.8088 145.70220 226.8534
## 5.571429       191.6270 159.13334 224.1207 141.93223 241.3218
## 5.714286       183.1827 145.66219 220.7031 125.80006 240.5652
## 5.857143       120.0875  78.13834 162.0366  55.93180 184.2432
## 6.000000       127.8097  81.85667 173.7627  57.53063 198.0887
## 6.142857       139.3891  89.75419 189.0240  63.47906 215.2991

plot(fore_arima, main = paste("ARIMA Forecast - Cluster", forecast_cluster_id))

fore_ets <- forecast(fit_ets, h = 7)
print(fore_ets)

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 5.285714       155.7452 137.9232 173.5671 128.4889 183.0015
## 5.428571       190.6309 172.8090 208.4529 163.3746 217.8873
## 5.571429       193.7973 175.9753 211.6192 166.5409 221.0536
## 5.714286       198.4056 180.5836 216.2276 171.1493 225.6619
## 5.857143       131.3798 113.5579 149.2018 104.1235 158.6362
## 6.000000       129.8832 112.0612 147.7051 102.6268 157.1395
## 6.142857       157.7718 139.9499 175.5938 130.5155 185.0282

plot(fore_ets, main = paste("ETS Forecast - Cluster", forecast_cluster_id))

The historical data (black line) in both plots reveals a clear and strong weekly pattern in Uber ride demand. This is consistent with typical urban ride behavior, where weekends and weekdays exhibit different usage intensities.

Both the ARIMA and ETS models forecast a continuation of this strong weekly cycle, predicting an initial peak followed by a trough over the next period. However, a direct comparison of the two shows the ETS model provides visibly tighter (narrower) confidence intervals (the grey shaded area) than the ARIMA model. This indicates that, for this specific dataset, the ETS model is more confident in its predictions and likely provides a better fit.