mainData <- read.csv("activity.csv", sep=",")
head(mainData)
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
hist(mainData$steps)
stepsPerDay <- tapply(mainData$steps, mainData$date, sum, na.rm=TRUE)
head(stepsPerDay)
## 2012-10-01 2012-10-02 2012-10-03 2012-10-04 2012-10-05 2012-10-06
## 0 126 11352 12116 13294 15420
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: splines
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
qplot(stepsPerDay, xlab="Total steps for day", ylab="Frequency of steps", binwidth=1000)
meanByDay <- mean(stepsPerDay)
medianByDay <- median(stepsPerDay)
Mean of total number of steps taken each day is 9354.2295082.
Mean of total number of steps taken each day is 10395.
averageStepsPerTimeBlock <- aggregate(x=list(meanSteps=mainData$steps), by=list(interval=mainData$interval), FUN=mean, na.rm=TRUE)
head(averageStepsPerTimeBlock)
## interval meanSteps
## 1 0 1.7169811
## 2 5 0.3396226
## 3 10 0.1320755
## 4 15 0.1509434
## 5 20 0.0754717
## 6 25 2.0943396
qplot(interval, meanSteps, data=averageStepsPerTimeBlock, geom=c("line"), xlab="Average no of steps taken", ylab="5-min interval")
mostSteps <- which.max(averageStepsPerTimeBlock$meanSteps)
timeMostSteps <- gsub("([0-9]{1,2})([0-9]{2})", "\\1:\\2", averageStepsPerTimeBlock[mostSteps,'interval'])
Most Steps at: 8:35
missingValues <- length(which(is.na(mainData$steps)))
Total number of missing value is 2304.
Hmisc package impute function is used here.
dataImputed <- mainData
dataImputed$steps <- impute(mainData$steps, fun=mean)
stepsPerDayImputed <- tapply(dataImputed$steps, dataImputed$date, sum)
qplot(stepsPerDayImputed, xlab='Total steps per day (Imputed)', ylab='Frequency using', binwidth=100)
Calculate and report the mean and median
stepsPerDayMeanImputed <- mean(stepsPerDayImputed)
stepsPerDayMedianImputed <- median(stepsPerDayImputed)
Mean (Imputed): 1.076618910^{4} Median (Imputed): 1.076618910^{4}
dataImputed$dateType <- ifelse(as.POSIXlt(dataImputed$date)$wday %in% c(0,6), 'weekend', 'weekday')
avgdataImputed <- aggregate(steps ~ interval + dateType, data=dataImputed, mean)
ggplot(avgdataImputed, aes(interval, steps)) +
geom_line() +
facet_grid(dateType ~ .) +
xlab("5-minute interval") +
ylab("avarage number of steps")