# Load required libraries
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.4.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.4.3
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
library(DT)
## Warning: package 'DT' was built under R version 4.4.3
# Step 1: Import the dataset
uber <- read.csv("C:/Users/22bt0/OneDrive/Desktop/uber_nyc_enriched.csv")

# 🧹 Step 2: Preprocess & Clean Data
uber_clean <- uber %>%
  filter(!is.na(pickup_dt), !is.na(pickups)) %>%
  mutate(
    pickup_dt = ymd_hms(pickup_dt),
    date = as.Date(pickup_dt),
    year = year(pickup_dt),
    month = month(pickup_dt, label = TRUE, abbr = TRUE),
    day = day(pickup_dt),
    weekday = wday(pickup_dt, label = TRUE, abbr = FALSE),
    hour = hour(pickup_dt)
  )

# View initial data
datatable(head(uber_clean, 10), caption = "Preview of Cleaned Uber NYC Data")
# Step 3: Daily Trend
daily_pickups <- uber_clean %>%
  group_by(date) %>%
  summarise(total_pickups = sum(pickups))

ggplot(daily_pickups, aes(x = date, y = total_pickups)) +
  geom_line(color = "steelblue") +
  labs(title = "Daily Uber Pickups in NYC", x = "Date", y = "Pickups") +
  theme_minimal()

# Step 4: Monthly Trend
monthly_pickups <- uber_clean %>%
  group_by(month) %>%
  summarise(total_pickups = sum(pickups))

ggplot(monthly_pickups, aes(x = month, y = total_pickups, fill = month)) +
  geom_bar(stat = "identity") +
  labs(title = "Monthly Uber Pickups in NYC", x = "Month", y = "Pickups") +
  theme_minimal()

# Step 5: Yearly Trend
yearly_pickups <- uber_clean %>%
  group_by(year) %>%
  summarise(total_pickups = sum(pickups))

ggplot(yearly_pickups, aes(x = factor(year), y = total_pickups, fill = factor(year))) +
  geom_bar(stat = "identity") +
  labs(title = "Yearly Uber Pickups in NYC", x = "Year", y = "Pickups") +
  theme_minimal()

# Step 6: Hourly Peak Trend
hourly_pickups <- uber_clean %>%
  group_by(hour) %>%
  summarise(total_pickups = sum(pickups))

ggplot(hourly_pickups, aes(x = hour, y = total_pickups)) +
  geom_line(color = "darkgreen") +
  labs(title = "Hourly Uber Pickups in NYC", x = "Hour", y = "Total Pickups") +
  theme_minimal()

# Step 7: Weekly Trend
weekday_pickups <- uber_clean %>%
  group_by(weekday) %>%
  summarise(total_pickups = sum(pickups))

ggplot(weekday_pickups, aes(x = reorder(weekday, total_pickups), y = total_pickups, fill = weekday)) +
  geom_col() +
  labs(title = "Uber Pickups by Day of the Week", x = "Weekday", y = "Total Pickups") +
  theme_minimal()

# Step 8: Holiday Analysis (Corrected)
holiday_pickups <- uber_clean %>%
  filter(hday == "Y") %>%
  group_by(date) %>%
  summarise(holiday_pickups = sum(pickups))

ggplot(holiday_pickups, aes(x = date, y = holiday_pickups)) +
  geom_bar(stat = "identity", fill = "red") +
  labs(title = "Uber Pickups on Holidays", x = "Holiday Date", y = "Total Pickups") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#  Step 9: Weather Correlation (Example: Temperature vs Pickups)
weather_temp <- uber_clean %>%
  group_by(temp) %>%
  summarise(avg_pickups = mean(pickups))

ggplot(weather_temp, aes(x = temp, y = avg_pickups)) +
  geom_point(color = "orange", alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Temperature vs Average Uber Pickups", x = "Temperature (°F)", y = "Avg Pickups") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'