This assignment makes use of data from a personal activity monitoring device. This device collects data at 5 minute intervals through out the day. The data consists of two months of data from an anonymous individual collected during the months of October and November, 2012 and include the number of steps taken in 5 minute intervals each day.
The data for this assignment can be downloaded from the course web site:
The variables included in this dataset are:
unzip("activity.zip")
data <- read.csv("activity.csv")
data$date <- as.Date(data$date)
str(data)
## 'data.frame': 17568 obs. of 3 variables:
## $ steps : int NA NA NA NA NA NA NA NA NA NA ...
## $ date : Date, format: "2012-10-01" "2012-10-01" ...
## $ interval: int 0 5 10 15 20 25 30 35 40 45 ...
library(dplyr)
# Remove NA values
clean_data <- data %>% filter(!is.na(steps))
# Calculate total number of steps per day
daily_steps <- clean_data %>%
group_by(date) %>%
summarise(total = sum(steps))
hist(daily_steps$total, main = "Total Steps Per Day", xlab = "Steps", col = "lightblue", breaks = 20)
mean_steps <- mean(daily_steps$total)
median_steps <- median(daily_steps$total)
mean_steps
## [1] 10766.19
median_steps
## [1] 10765
interval_avg <- clean_data %>%
group_by(interval) %>%
summarise(avg_steps = mean(steps))
plot(interval_avg$interval, interval_avg$avg_steps, type = "l",
xlab = "5-Minute Interval", ylab = "Average Number of Steps",
main = "Average Daily Activity Pattern")
max_interval <- interval_avg[which.max(interval_avg$avg_steps), "interval"]
max_interval
## # A tibble: 1 × 1
## interval
## <int>
## 1 835
sum(is.na(data$steps))
## [1] 2304
imputed_data <- data %>%
left_join(interval_avg, by = "interval") %>%
mutate(steps = ifelse(is.na(steps), avg_steps, steps)) %>%
select(steps, date, interval)
daily_imputed <- imputed_data %>%
group_by(date) %>%
summarise(total = sum(steps))
hist(daily_imputed$total, main = "Imputed: Total Steps Per Day", xlab = "Steps", col = "lightgreen", breaks = 20)
mean_imputed <- mean(daily_imputed$total)
median_imputed <- median(daily_imputed$total)
mean_imputed
## [1] 10766.19
median_imputed
## [1] 10766.19
imputed_data <- imputed_data %>%
mutate(weekday = weekdays(date),
day_type = ifelse(weekday %in% c("Saturday", "Sunday"), "weekend", "weekday")) %>%
group_by(interval, day_type)
interval_by_day <- imputed_data %>%
group_by(interval, day_type) %>%
summarise(avg_steps = mean(steps))
## `summarise()` has grouped output by 'interval'. You can override using the
## `.groups` argument.
library(lattice)
xyplot(avg_steps ~ interval | day_type, data = interval_by_day,
layout = c(1, 2), type = "l", xlab = "Interval", ylab = "Number of Steps")