Data Load

data <- read.csv("./Data/activity.csv")

Sum steps by day

stepsSum <- aggregate(steps ~ date, data, sum)

#Histogram steps per day
hist(stepsSum$steps, 
     main = paste("Total Steps Per Day"), 
     xlab="Number of Steps",
     col="orange", 
     breaks = 20)

#Mean and Median
stepsMean <- mean(stepsSum$steps)
stepsMean
## [1] 10766.19
stepsMedian <- median(stepsSum$steps)
stepsMedian
## [1] 10765
#Average steps for each interval for all days
stepsIntervalMean <- aggregate(steps ~ interval, data, mean)

plot(stepsIntervalMean$interval,
     stepsIntervalMean$steps, 
     type="l", 
     xlab="5-minute Interval", 
     ylab="Number of Steps",
     main="Average Number of Steps per day by Interval")

#Find interval with most average steps
maxInterval <- stepsIntervalMean[which.max(stepsIntervalMean$steps),1]
maxInterval
## [1] 835

the total number of missing values in the dataset

dataNA <- sum(is.na(data$steps))
dataNA
## [1] 2304
#Missing values were imputed by inserting the average for each interval
dataNoNa <- transform(data, 
                      steps = ifelse(is.na(data$steps), 
                                     stepsIntervalMean$steps[match(data$interval,stepsIntervalMean$interval)], 
                                     data$steps))

stepsNoNaSum <- aggregate(steps ~ date, dataNoNa, sum)
stepsNoNaSum
##          date    steps
## 1  2012-10-01 10766.19
## 2  2012-10-02   126.00
## 3  2012-10-03 11352.00
## 4  2012-10-04 12116.00
## 5  2012-10-05 13294.00
## 6  2012-10-06 15420.00
## 7  2012-10-07 11015.00
## 8  2012-10-08 10766.19
## 9  2012-10-09 12811.00
## 10 2012-10-10  9900.00
## 11 2012-10-11 10304.00
## 12 2012-10-12 17382.00
## 13 2012-10-13 12426.00
## 14 2012-10-14 15098.00
## 15 2012-10-15 10139.00
## 16 2012-10-16 15084.00
## 17 2012-10-17 13452.00
## 18 2012-10-18 10056.00
## 19 2012-10-19 11829.00
## 20 2012-10-20 10395.00
## 21 2012-10-21  8821.00
## 22 2012-10-22 13460.00
## 23 2012-10-23  8918.00
## 24 2012-10-24  8355.00
## 25 2012-10-25  2492.00
## 26 2012-10-26  6778.00
## 27 2012-10-27 10119.00
## 28 2012-10-28 11458.00
## 29 2012-10-29  5018.00
## 30 2012-10-30  9819.00
## 31 2012-10-31 15414.00
## 32 2012-11-01 10766.19
## 33 2012-11-02 10600.00
## 34 2012-11-03 10571.00
## 35 2012-11-04 10766.19
## 36 2012-11-05 10439.00
## 37 2012-11-06  8334.00
## 38 2012-11-07 12883.00
## 39 2012-11-08  3219.00
## 40 2012-11-09 10766.19
## 41 2012-11-10 10766.19
## 42 2012-11-11 12608.00
## 43 2012-11-12 10765.00
## 44 2012-11-13  7336.00
## 45 2012-11-14 10766.19
## 46 2012-11-15    41.00
## 47 2012-11-16  5441.00
## 48 2012-11-17 14339.00
## 49 2012-11-18 15110.00
## 50 2012-11-19  8841.00
## 51 2012-11-20  4472.00
## 52 2012-11-21 12787.00
## 53 2012-11-22 20427.00
## 54 2012-11-23 21194.00
## 55 2012-11-24 14478.00
## 56 2012-11-25 11834.00
## 57 2012-11-26 11162.00
## 58 2012-11-27 13646.00
## 59 2012-11-28 10183.00
## 60 2012-11-29  7047.00
## 61 2012-11-30 10766.19
hist(stepsNoNaSum$steps, 
     main = paste("Total Steps Each Day - No NA"), 
     xlab="Number of Steps", 
     col="orange", 
     breaks = 20)

#No NA Data - mean and median

NoNaMean <- mean(stepsNoNaSum$steps)
NoNaMean
## [1] 10766.19
NoNaMedian <- median(stepsNoNaSum$steps)
NoNaMedian
## [1] 10766.19
#Calculate difference between imputed and non-imputed data.

diffMean <- stepsMean - NoNaMean
diffMean
## [1] 0
diffMedian <- stepsMedian - NoNaMedian
diffMedian
## [1] -1.188679
#Calculate total difference
diffTotal <- sum(stepsSum$steps) - sum(stepsNoNaSum$steps)
diffTotal
## [1] -86129.51

new field for weekday and weekend

weekend <- c("Saturday", "Sunday")
dataNoNa$dow <- as.factor(ifelse(is.element(weekdays(as.Date(dataNoNa$date)),
                                                weekend), "Weekend", "Weekday"))

stepsbyIntervalDow <- aggregate(steps ~ interval + dow, dataNoNa, mean)

#Make a panel plot containing a time series plot
library(lattice)
xyplot(stepsbyIntervalDow$steps ~ stepsbyIntervalDow$interval|stepsbyIntervalDow$dow, 
       main="Average Steps taken per Day by Interval",
       xlab=" 5-minute interval", 
       ylab="Total number of Steps",
       layout=c(2,1), 
       type="l")