Loading and preprocessing the data

setwd("/Users/adrianromano/Downloads")
activity <- read.csv("activity.csv")
str(activity)
## 'data.frame':    17568 obs. of  3 variables:
##  $ steps   : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ date    : Factor w/ 61 levels "2012-10-01","2012-10-02",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ interval: int  0 5 10 15 20 25 30 35 40 45 ...
activity$date <- as.Date(activity$date)

What is mean total number of steps taken per day?

  1. Calculate the total number of steps taken per day
library(plyr)
totalSteps <- aggregate(activity$steps ~ activity$date, FUN = sum)
colnames(totalSteps) <- c("Date", "Steps")
head(totalSteps)
##         Date Steps
## 1 2012-10-02   126
## 2 2012-10-03 11352
## 3 2012-10-04 12116
## 4 2012-10-05 13294
## 5 2012-10-06 15420
## 6 2012-10-07 11015
  1. Histogram of the total number of steps taken per day
hist(totalSteps$Steps, main = "Total Number of Steps", xlab = "Number of Steps", col = "red")

  1. Calculate the mean and median of the total number of steps taken per day
meanSteps <- mean(totalSteps$Steps, na.rm = TRUE)
meanSteps
## [1] 10766.19
medianSteps <- median(totalSteps$Steps, na.rm = TRUE)
medianSteps
## [1] 10765

What is the average daily activity pattern?

  1. Create a time series plot of the 5-minute interval and the average number of steps taken averaged across all days
averageSteps <- aggregate(activity$steps ~ activity$interval, FUN = mean, na.rm = TRUE)
colnames(averageSteps) <- c("Interval", "Steps")
head(averageSteps)
##   Interval     Steps
## 1        0 1.7169811
## 2        5 0.3396226
## 3       10 0.1320755
## 4       15 0.1509434
## 5       20 0.0754717
## 6       25 2.0943396
plot(averageSteps$Steps ~ averageSteps$Interval, 
                        type = "l", 
                        col = "blue", 
                        main = "Average Number of Steps per Interval", 
                        xlab = "Interval", 
                        ylab = "Average Number of Steps")

  1. Look at the maximum number of steps for the average 5-minute interval across all days
maxSteps <- averageSteps[which.max(averageSteps$Steps),]
maxSteps
##     Interval    Steps
## 104      835 206.1698

Imputing missing values

  1. Look at the number of missing values in the data
sum(is.na(activity))
## [1] 2304
  1. Replace NA Values with the mean average steps per interval
meanPerInterval <- mean(averageSteps$Steps, na.rm = TRUE)
activity1 <- activity
activity1$steps[is.na(activity1$steps)] <- meanPerInterval
  1. Histogram of the total number of steps taken each day after missing values were imputed
totalSteps1 <- aggregate(activity1$steps ~ activity1$date, FUN = sum)
colnames(totalSteps1) <- c("Date", "Steps")
hist(totalSteps$Steps, main = "Total Number of Steps", xlab = "Number of Steps", col = "green")

  1. Calculate the mean and median of the total number of steps taken each day after missing values were imputed
meanSteps1 <- mean(totalSteps1$Steps)
meanSteps1
## [1] 10766.19
medianSteps1 <- median(totalSteps1$Steps)
medianSteps1
## [1] 10766.19

Are there differences in activity patterns between weekdays and weekends?

  1. Create a new factor variable cating whether a given date is a weekday or weekend day
activity1$date <- as.Date(activity1$date)
activity1$dayCategory <- ifelse(weekdays(activity1$date) == "Saturday" | weekdays(activity1$date) == "Sunday", "Weekend", "Weekday")
averageDayCategory <- aggregate(activity1$steps ~ activity1$dayCategory + activity1$interval, FUN = mean)
colnames(averageDayCategory) <- c("DayType", "Interval", "Steps")
head(averageDayCategory)
##   DayType Interval    Steps
## 1 Weekday        0 7.006569
## 2 Weekend        0 4.672825
## 3 Weekday        5 5.384347
## 4 Weekend        5 4.672825
## 5 Weekday       10 5.139902
## 6 Weekend       10 4.672825
  1. Make a panel plot containing a time series plot of the 5-minute interval and the average number of steps taken, averaged across all weekday days or weekend days
library(ggplot2)
p <- ggplot(averageDayCategory, aes(Interval, Steps, color = DayType))
p + geom_line() + labs(x = "Intervals", y = "Average Number of Steps", title = "Activity Patterns") + facet_wrap(~DayType,ncol=1,nrow=2)