Loading and Preprocessing the Data
# Load the data
activity <- read.csv("activity.csv")
# Process/transform the data
activity$date <- as.Date(activity$date)
What is mean total number of steps taken per day?
# Calculate average steps for each interval
avg_steps <- aggregate(steps ~ interval, activity, mean, na.rm = TRUE)
# Create time series plot
plot(avg_steps$interval, avg_steps$steps, type = "l",
main = "Average Daily Activity Pattern",
xlab = "5-minute Interval", ylab = "Average Steps")

# Find interval with maximum steps
max_interval <- avg_steps$interval[which.max(avg_steps$steps)]
print(paste("Interval with maximum steps:", max_interval))
## [1] "Interval with maximum steps: 835"
Are there differences in activity patterns between weekdays and weekends??
# Create weekday/weekend factor
activity_imputed$day_type <- ifelse(weekdays(activity_imputed$date) %in% c("Saturday", "Sunday"), "weekend", "weekday")
activity_imputed$day_type <- as.factor(activity_imputed$day_type)
# Calculate average steps for each interval and day type
avg_steps_by_day <- aggregate(steps ~ interval + day_type, activity_imputed, mean)
# Create panel plot
library(ggplot2)
ggplot(avg_steps_by_day, aes(x = interval, y = steps, color = day_type)) +
geom_line() +
facet_wrap(~ day_type, ncol = 1, nrow = 2) +
labs(title = "Average Steps by Interval and Day Type",
x = "5-minute Interval", y = "Average Steps")
