Team26(D,M)

Author

Deepthi,Meghana

Step 1 :load the libraries

library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(GGally)
Warning: package 'GGally' was built under R version 4.5.3
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
data <- read.csv("uber-raw-data-apr14.csv", stringsAsFactors = FALSE)
colnames(data) <- tolower(trimws(colnames(data)))
colnames(data)[colnames(data) == "date.time"] <- "datetime"

data$datetime <- as.POSIXct(data$datetime, format="%m/%d/%Y %H:%M:%S")

data$hour <- hour(data$datetime)
data$day <- day(data$datetime)
data$weekday <- wday(data$datetime, label = TRUE)
# Peak hours
data$peak_hour <- ifelse(data$hour %in% c(7,8,9,17,18,19), "Peak", "Non-Peak")

# Approx borough classification
data$borough <- ifelse(data$lat > 40.75, "Manhattan",
                  ifelse(data$lat > 40.65, "Brooklyn",
                  ifelse(data$lat > 40.6, "Queens", "Other")))
pickup_data <- data %>%
  group_by(base) %>%
  summarise(
    total_pickups = n(),
    avg_hour = mean(hour, na.rm = TRUE),
    avg_lat = mean(lat, na.rm = TRUE),
    avg_lon = mean(lon, na.rm = TRUE)
  )
ggparcoord(
  data = pickup_data,
  columns = 2:ncol(pickup_data),
  groupColumn = 1,
  scale = "uniminmax",
  showPoints = TRUE,
  alphaLines = 0.6
) +
  labs(
    title = "Parallel Coordinates Plot of Uber Pickups",
    x = "Indicators",
    y = "Scaled Values"
  ) +
  theme_minimal()

ggplot(data, aes(x = hour)) +
  geom_histogram(binwidth = 1, fill = "blue") +
  labs(title = "Uber Pickups by Hour", x = "Hour", y = "Count")

ggplot(data, aes(x = weekday)) +
  geom_bar(fill = "green") +
  labs(title = "Pickups by Day of Week")

ggplot(data, aes(x = borough)) +
  geom_bar(fill = "purple") +
  labs(title = "Pickups by Borough")