Loading and preprocessing the data

data <- read.csv("activity.csv", header = TRUE)

What is mean total number of steps taken per day?

stepsPerDay <- sapply(split(data$steps, data$date), sum)
hist(stepsPerDay, 
     col = "green", 
     main = "Number of steps per day", 
     xlab = "Number of steps",
     ylab = "Days",
     breaks = 20,
     ylim = c(0, 11),
     xlim = c(0, 23000))

mean(stepsPerDay, na.rm = TRUE)
## [1] 10766.19
median(stepsPerDay, na.rm = TRUE)
## [1] 10765

What is the average daily activity pattern?

avgStepsPerInterval <- sapply(split(data$steps, data$interval), mean, na.rm = TRUE)
plot(avgStepsPerInterval, 
     col = "red",
     type = "l", 
     main = "Number of steps per 5min Interval",
     xlab = "5min interval", 
     ylab = "Number of steps")

names(avgStepsPerInterval)[avgStepsPerInterval == max(avgStepsPerInterval)]
## [1] "835"

Imputing missing values

sum(is.na(data$steps))
## [1] 2304
dataFilled <- data
for (i in 1:nrow(dataFilled)) {
  if (is.na(dataFilled[i, "steps"])) {
    interval = as.character(dataFilled[i, "interval"])
    dataFilled[i, "steps"] = avgStepsPerInterval[interval]
  }
}

totalStepsPerDay <- sapply(split(dataFilled$steps, dataFilled$date), sum)
hist(totalStepsPerDay, 
     col = "blue", 
     main = "Number of steps per day (NAs filled)", 
     xlab = "Number of steps",
     ylab = "Days",
     breaks = 20,
     ylim = c(0, 12),
     xlim = c(0, 23000))

mean(totalStepsPerDay, na.rm = TRUE)
## [1] 10766.19
median(totalStepsPerDay, na.rm = TRUE)
## [1] 10766.19

Are there differences in activity patterns between weekdays and weekends?

dataFilled$weekday <- weekdays(as.Date(dataFilled$date, format = "%Y-%m-%d"))
dataFilled$dayType <- factor(ifelse(dataFilled$weekday == "Sunday" | dataFilled$weekday == 
                                           "Saturday", "weekend", "weekday"), levels = c("weekday", "weekend"))

dataFilledWeekdays  <- dataFilled[dataFilled$dayType == "weekday", ]
dataFilledWeekend   <- dataFilled[dataFilled$dayType == "weekend", ]
avStepsPerIntWeekdays <- sapply(split(dataFilledWeekdays$steps, dataFilledWeekdays$interval), mean)
avStepsPerIntWeekend  <- sapply(split(dataFilledWeekend$steps, dataFilledWeekend$interval), mean)

par(mfrow = c(2, 1), mar = c(4, 5, 2, 2))
plot(avStepsPerIntWeekend, 
     type = "l", 
     col = "blue",
     xlab = "",
     ylab = "Avg. steps",
     main = "weekend")

plot(avStepsPerIntWeekdays, 
     type = "l", 
     col = "red",
     xlab = "5min interval",
     ylab = "Avg. steps",
     main = "weekday")