This assignment makes use of data from a personal activity monitoring device. This device collects data at 5 minute intervals through out the day. The data consists of two months of data from an anonymous individual collected during the months of October and November, 2012 and include the number of steps taken in 5 minute intervals each day.

1. Loading and preprocessing the data

Check the data file if exist in the working folder, otherwise download from provided link and load into data frame.

if (!file.exists("activity.csv") )
{
  dlurl <- 'http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip'  
  download.file(dlurl,destfile='repdata%2Fdata%2Factivity.zip',mode='wb')  
  unzip('repdata%2Fdata%2Factivity.zip')
}

data <- read.csv("activity.csv") 

2. What is mean total number of steps taken per day?

# calculate the total number of steps taken per day

steps_by_day <- aggregate(steps ~ date, data, sum)

# Make a histogram

hist(steps_by_day$steps, main = paste("Total steps per day"), col=rgb(1,0,0,0.5),
     xlab="Number of Steps")

# calculate and report the mean

rmean <- mean(steps_by_day$steps)
rmean
## [1] 10766.19
#calculate and report the median

rmedian <- median(steps_by_day$steps)
rmedian
## [1] 10765

3. What is the average daily activity pattern?

# calculate interval and make a time series plot. 

steps_by_interval <- aggregate(steps ~ interval, data, mean)
plot(steps_by_interval$interval,steps_by_interval$steps, type="l", 
     xlab="5-minute Interval", ylab="Number of Steps",
     main="Average # of steps per day by Interval")

# identify max interval 
max_interval <- steps_by_interval[which.max(steps_by_interval$steps),1]
max_interval
## [1] 835

** Imputing missing values**

# Calculate and report the total number of missing values in the dataset

NATotal <- sum(!complete.cases(data))
NATotal
## [1] 2304
StepsAvg <- aggregate(steps ~ interval, data = data, FUN = mean)
imptNA <- numeric()
for (i in 1:nrow(data)) {
  obs <- data[i, ]
  if (is.na(obs$steps)) {
    steps <- subset(StepsAvg, interval == obs$interval)$steps
  } else {
    steps <- obs$steps
  }
  imptNA <- c(imptNA, steps)
}

newdata <- data
newdata$steps <- imptNA

# Make histogram, calculate mean, median and compare with original data

StepsTotal <- aggregate(steps ~ date, data = newdata, sum, na.rm = TRUE)
hist(StepsTotal$steps, main = paste("Total Steps Each Day"), col="blue", xlab="Number of Steps")

#Create Histogram to show difference. 
hist(steps_by_day$steps, main = paste("Total Steps Each Day"), col=rgb(1,0,0,0.5), xlab="Number of Steps", add=T)
legend("topright", c("Imputed", "Non-imputed"), col=c("blue", rgb(1,0,0,0.5)), lwd=10)

rmeantotal <- mean(StepsTotal$steps)
rmeantotal
## [1] 10766.19
rmediantotal <- median(StepsTotal$steps)
rmediantotal
## [1] 10766.19
rmediandiff <- rmediantotal - rmedian
rmediandiff
## [1] 1.188679

** Are there differences in activity patterns between weekdays and weekends?**

weekdays <- c("Monday", "Tuesday", "Wednesday", "Thursday", 
              "Friday")
newdata$dow = as.factor(ifelse(is.element(weekdays(as.Date(newdata$date)),weekdays), "Weekday", "Weekend"))
StepsTotal <- aggregate(steps ~ interval + dow, newdata, mean)
library(lattice)
xyplot(StepsTotal$steps ~ StepsTotal$interval|StepsTotal$dow, 
       main="Average Steps per Day by Interval", xlab="Interval", ylab="Steps",layout=c(1,2), type="l")