Reproducible Research: Project

Rithesh Kumar

August 2013

Loading and preprocessing the data

setwd("/home/rithesh/RepData_PeerAssessment1")
data <- read.csv("activity.csv")

What is mean total number of steps taken per day?

library(ggplot2)
completedata <- data[complete.cases(data),]
steps_per_day <- with(completedata,aggregate(steps,by=list(date=date),FUN=sum))
qplot(steps_per_day[,2],geom="histogram",xlab="Total No. Of Steps",ylab="Count")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-2

meansteps <- mean(steps_per_day[,2])
mediansteps <- median(steps_per_day[,2])

Mean total number of steps taken per day = 1.0766 × 104
Median total numberof steps taken per day = 10765

What is the average daily activity pattern?

steps_per_interval <- with(completedata,aggregate(steps,by=list(interval=interval),FUN=mean,na.rm=TRUE))

names(steps_per_interval)[2] <- "steps"

qplot(x=steps_per_interval[,1],y=steps_per_interval[,2],geom="line",ylab = "Average Steps Taken Per Day",xlab="5 Minute Interval")

plot of chunk unnamed-chunk-4

max_interval <- steps_per_interval[which(steps_per_interval$steps==max(steps_per_interval$steps)),1]

The 5-minute interval that records the maximum number of steps on average across all the days in the dataset is 835

Imputing missing values

library(Hmisc)
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
tmissing <- sum(complete.cases(data))

Total number of missing values in the dataset is 15264

Creating a dataset with missing values imputed with average value in the 5-minute interval

tidydata <- data
missvalues <- which(!complete.cases(data))
for (i in 1:length(missvalues))
 {
     tidydata[missvalues[i],1] <- mean(tidydata[tidydata$interval==tidydata[missvalues[i],"interval"],1],na.rm=TRUE)
 }

Plotting histogram and reporting mean and median total number of steps taken per day after imputing missing values.

steps_per_day <- with(tidydata,aggregate(steps,by=list(date=date),FUN=sum))
qplot(steps_per_day[,2],geom="histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-7

newmeansteps <- mean(steps_per_day[,2])
newmediansteps <- median(steps_per_day[,2])

Mean total number of steps taken per day (after imputing missing values) = 1.0766 × 104
Median total number of steps taken per day (after imputing missing values) = 10765

Difference between Mean total number of steps taken per day after imputation = 0

Difference between Median total number of steps taken per day after imputation = 1.1887

Are there differences in activity patterns between weekdays and weekends?

Creating factor variable indicating whether the given data is a weekday or a weekend

weekday <- weekdays(as.Date(tidydata$date))
weekday[which(weekday=="Sunday")] <- "weekend"
weekday[which(weekday=="Saturday")] <- "weekend"
weekday[which(weekday!="weekend")] <- "weekday"

Panel plot containing a time series plot (i.e. type = “l”) of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all weekday days or weekend days (y-axis).

library(lattice)
tidydata$weekday <- weekday

steps_per_interval <- with(tidydata,aggregate(steps,by=list(interval=interval,weekday=weekday),FUN=mean,na.rm=TRUE))

tidydata$weekday <- NULL

xyplot(x~interval|as.factor(weekday),data=steps_per_interval,type="l",lty=1,layout=c(1,2),ylab="Average No. Of Steps")

plot of chunk unnamed-chunk-10