Title: “Reproducible Research: Peer Assessment 1”

output: html_document: keep_md: true

Loading and preprocessing the data
library(knitr)
## Warning: package 'knitr' was built under R version 3.2.2
library(datasets) 
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.2
setwd("~/RepData_PeerAssessment1")
data <- read.csv("activity.csv")
What is mean total number of steps taken per day?
library(ggplot2)
total.steps <- tapply(data$steps, data$date, FUN=sum, na.rm=TRUE)
qplot(total.steps, binwidth=1000, xlab="total number of steps taken each day")

mean(total.steps, na.rm=TRUE)
## [1] 9354.23
median(total.steps, na.rm=TRUE)
## [1] 10395
What is mean total number of steps taken per day?
mean(total.steps, na.rm = TRUE)
## [1] 9354.23
median(total.steps, na.rm=TRUE)
## [1] 10395
What is the average daily activity pattern?
library(ggplot2)
averages <- aggregate(x=list(steps=data$steps), by=list(interval=data$interval),
                      FUN=mean, na.rm=TRUE)
ggplot(data=averages, aes(x=interval, y=steps)) +
    geom_line() +
    xlab("5-minute interval") +
    ylab("average number of steps taken")

Which 5-minutte interval, On average across all the days in the dataset,, contains the maximum number of steps?
averages[which.max(averages$steps), ]
##     interval    steps
## 104      835 206.1698
Inputing missing values
missing <- is.na(data$steps)
# How many missing
table(missing)
## missing
## FALSE  TRUE 
## 15264  2304
Devise a strategy for filling in all of the missing values in the dataset. The strategy does not need to be sophisticated. All of the missing values are filled in with mean value for that 5-minute interval.
##### Replace each missing value with the mean value of its 5-minute interval
fill.value <- function(steps, interval) {
    filled <- NA
    if (!is.na(steps)) 
        filled <- c(steps) else filled <- (averages[averages$interval == interval, "steps"])
    return(filled)
}
filled.data <- data
filled.data$steps <- mapply(fill.value, filled.data$steps, filled.data$interval)
Make a histogram of the total number of steps taken each day and Calculate and report the mean and median total number of steps taken per day.
total.steps <- tapply(filled.data$steps, filled.data$date, FUN = sum)
qplot(total.steps, binwidth = 1000, xlab = "total number of steps taken each day")

mean(total.steps)
## [1] 10766.19
median(total.steps)
## [1] 10766.19
Are there differences in activity patterns between weekdays and weekends?
weekday.or.weekend <- function(date) {
    day <- weekdays(date)
    if (day %in% c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")) 
        return("weekday") else if (day %in% c("Saturday", "Sunday")) 
        return("weekend") else stop("invalid date")
}
filled.data$date <- as.Date(filled.data$date)
filled.data$day <- sapply(filled.data$date, FUN = weekday.or.weekend)
Make a panel plot containing a time series plot (i.e. type = “l”) of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all weekday days or weekend days (y-axis).
averages <- aggregate(steps ~ interval + day, data = filled.data, mean)
ggplot(averages, aes(interval, steps)) + geom_line() + facet_grid(day ~ .) + 
    xlab("5-minute interval") + ylab("Number of steps")