# Read the data
bike_sharing_data <- read.csv("C:/Statistics for Data Science/Week 2/bike+sharing+dataset/hour.csv")
# Convert date column to proper Date format
bike_ts <- bike_sharing_data |>
mutate(date = as.Date(dteday)) |>
group_by(date) |>
summarise(total_rides = sum(cnt)) |>
as_tsibble(index = date)
# Show the first few rows
print("First few rows of our tsibble:")
## [1] "First few rows of our tsibble:"
print(bike_ts)
## # A tsibble: 731 x 2 [1D]
## date total_rides
## <date> <int>
## 1 2011-01-01 985
## 2 2011-01-02 801
## 3 2011-01-03 1349
## 4 2011-01-04 1562
## 5 2011-01-05 1600
## 6 2011-01-06 1606
## 7 2011-01-07 1510
## 8 2011-01-08 959
## 9 2011-01-09 822
## 10 2011-01-10 1321
## # ℹ 721 more rows
Insight: Successfully converted date string to R Date format and created a tsibble object for time series analysis.
# Plot different time windows
# Full period
p1 <- bike_ts |>
ggplot(aes(x = date, y = total_rides)) +
geom_line() +
labs(title = "Daily Bike Rentals - Full Period",
x = "Date",
y = "Total Rentals") +
theme_minimal()
# Create weekly and monthly views
weekly_ts <- bike_ts |>
index_by(week = yearweek(date)) |>
summarise(total_rides = sum(total_rides))
monthly_ts <- bike_ts |>
index_by(month = yearmonth(date)) |>
summarise(total_rides = sum(total_rides))
# Plot weekly view
p2 <- weekly_ts |>
ggplot(aes(x = week, y = total_rides)) +
geom_line() +
labs(title = "Weekly Bike Rentals",
x = "Week",
y = "Total Rentals") +
theme_minimal()
# Plot monthly view
p3 <- monthly_ts |>
ggplot(aes(x = month, y = total_rides)) +
geom_line() +
labs(title = "Monthly Bike Rentals",
x = "Month",
y = "Total Rentals") +
theme_minimal()
print(p1)
print(p2)
print(p3)
What stands out immediately: - Clear weekly cycling pattern in usage - Strong seasonal variation with summer peaks - Overall upward trend in ridership - Weekend drops in usage visible in daily data
# Simple linear regression for overall trend
trend_model <- lm(total_rides ~ as.numeric(date), data = bike_ts)
# Print trend model results
summary(trend_model)
##
## Call:
## lm(formula = total_rides ~ as.numeric(date), data = bike_ts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6224.5 -986.3 169.5 1216.1 3384.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.399e+04 4.053e+03 -20.72 <2e-16 ***
## as.numeric(date) 5.769e+00 2.642e-01 21.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1507 on 729 degrees of freedom
## Multiple R-squared: 0.3954, Adjusted R-squared: 0.3946
## F-statistic: 476.8 on 1 and 729 DF, p-value: < 2.2e-16
# Add seasonal analysis
bike_ts_seasonal <- bike_ts |>
mutate(
month = month(date),
season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)
)
# Seasonal trends
seasonal_models <- bike_ts_seasonal |>
group_by(season) |>
group_modify(~ {
model <- lm(total_rides ~ as.numeric(date), data = .)
data.frame(
slope = coef(model)[2],
r_squared = summary(model)$r.squared
)
})
# Print seasonal results
print("Seasonal Trends:")
## [1] "Seasonal Trends:"
print(seasonal_models)
## # A tibble: 4 × 3
## # Groups: season [4]
## season slope r_squared
## <chr> <dbl> <dbl>
## 1 Fall 5.88 0.357
## 2 Spring 7.55 0.603
## 3 Summer 5.77 0.639
## 4 Winter 3.90 0.445
Trend Analysis Insights: - Strong overall upward trend in ridership - Seasonal variations in trend strength - R² values indicate model fit quality - Different growth rates across seasons
# Calculate moving averages for smoothing
bike_ts_smooth <- bike_ts |>
mutate(
MA7 = slider::slide_dbl(total_rides, mean, .before = 3, .after = 3, .complete = TRUE),
MA30 = slider::slide_dbl(total_rides, mean, .before = 15, .after = 14, .complete = TRUE)
)
# Plot with smoothing
ggplot(bike_ts_smooth, aes(x = date)) +
geom_line(aes(y = total_rides), alpha = 0.3) +
geom_line(aes(y = MA7, color = "Weekly MA")) +
geom_line(aes(y = MA30, color = "Monthly MA")) +
labs(title = "Bike Rentals with Moving Averages",
x = "Date",
y = "Number of Rentals",
color = "Moving Average") +
theme_minimal()
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_line()`).
Smoothing Insights: - Clear weekly seasonal pattern - Strong trend component - Varying seasonal amplitude - Some irregular patterns visible
# Generate ACF and PACF plots
bike_ts |>
gg_tsdisplay(total_rides,
plot_type = "partial",
lag_max = 30) +
labs(title = "ACF and PACF of Daily Rentals")
Seasonality Insights from ACF/PACF: - Strong weekly seasonality (lag 7) - Significant autocorrelation pattern - Clear seasonal dependencies - Useful for forecasting model selection