Loading and preprocessing the data
# Load required libraries
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load the dataset
activity <- read.csv("activity.csv", stringsAsFactors = FALSE)
# Convert date column to Date type
activity$date <- as.Date(activity$date, format="%Y-%m-%d")
What is mean total number of steps taken per day?
# Remove NA values and summarize total steps per day
total_steps_per_day <- activity %>%
filter(!is.na(steps)) %>%
group_by(date) %>%
summarize(total_steps = sum(steps))

# Calculate mean and median
mean_steps <- mean(total_steps_per_day$total_steps)
median_steps <- median(total_steps_per_day$total_steps)
# Print results
cat("Mean Total Steps Per Day:", mean_steps, "\n")
## Mean Total Steps Per Day: 10766.19
cat("Median Total Steps Per Day:", median_steps, "\n")
## Median Total Steps Per Day: 10765
What is the average daily activity pattern?
avg_steps_interval <- activity %>%
group_by(interval) %>%
summarize(avg_steps = mean(steps, na.rm = TRUE))
plot(avg_steps_interval$interval, avg_steps_interval$avg_steps, type = "l",
xlab = "5-minute Interval", ylab = "Average Steps",
main = "Average Daily Activity Pattern")

Interval with Maximum Steps
max_interval <- avg_steps_interval[which.max(avg_steps_interval$avg_steps), ]
max_interval
## # A tibble: 1 × 2
## interval avg_steps
## <int> <dbl>
## 1 835 206.
Imputing missing values
total_missing <- sum(is.na(activity$steps))
total_missing
## [1] 2304
Filling Missing Values
filled_activity <- activity
avg_steps_per_interval <- activity %>%
group_by(interval) %>%
summarize(mean_steps = mean(steps, na.rm = TRUE))
for (i in 1:nrow(filled_activity)) {
if (is.na(filled_activity$steps[i])) {
filled_activity$steps[i] <- avg_steps_per_interval$mean_steps[avg_steps_per_interval$interval == filled_activity$interval[i]]
}
}
Histogram of Total Steps Per Day (After Imputation)
total_steps_per_day_filled <- filled_activity %>%
group_by(date) %>%
summarize(total_steps = sum(steps))
hist(total_steps_per_day_filled$total_steps,
main = "Total Steps Per Day (Imputed Data)",
xlab = "Total Steps", col = "blue",
breaks = 20)

Are there differences in activity patterns between weekdays and
weekends?
filled_activity$day_type <- ifelse(weekdays(filled_activity$date) %in% c("Saturday", "Sunday"), "weekend", "weekday")
Panel Plot for Weekday vs. Weekend
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
avg_steps_by_day_type <- filled_activity %>%
group_by(interval, day_type) %>%
summarize(avg_steps = mean(steps))
## `summarise()` has grouped output by 'interval'. You can override using the
## `.groups` argument.
ggplot(avg_steps_by_day_type, aes(x = interval, y = avg_steps, color = day_type)) +
geom_line() +
facet_wrap(~day_type, ncol = 1) +
labs(title = "Activity Pattern: Weekday vs. Weekend", x = "5-minute Interval", y = "Average Steps") +
theme_minimal()
