activityData <- read.csv("activity.csv")
summary(activityData)
## steps date interval
## Min. : 0.00 Length:17568 Min. : 0.0
## 1st Qu.: 0.00 Class :character 1st Qu.: 588.8
## Median : 0.00 Mode :character Median :1177.5
## Mean : 37.38 Mean :1177.5
## 3rd Qu.: 12.00 3rd Qu.:1766.2
## Max. :806.00 Max. :2355.0
## NA's :2304
Exploring the basics of this data
names(activityData)
## [1] "steps" "date" "interval"
head(activityData)
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
stepsPerDay <- aggregate(steps ~ date, activityData, sum, na.rm=TRUE)
hist(stepsPerDay$steps)
meanStepsPerDay <- mean(stepsPerDay$steps)
meanStepsPerDay
## [1] 10766.19
medianStepsPerDay <- median(stepsPerDay$steps)
medianStepsPerDay
## [1] 10765
type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)stepsPerInterval<-aggregate(steps~interval, data=activityData, mean, na.rm=TRUE)
plot(steps~interval, data=stepsPerInterval, type="l")
intervalWithMaxNbSteps <- stepsPerInterval[which.max(stepsPerInterval$steps),]$interval
intervalWithMaxNbSteps
## [1] 835
##Imputing missing values
NAs)totalValuesMissings <- sum(is.na(activityData$steps))
totalValuesMissings
## [1] 2304
getMeanStepsPerInterval<-function(interval){
stepsPerInterval[stepsPerInterval$interval==interval,]$steps
}
activityDataNoNA<-activityData
for(i in 1:nrow(activityDataNoNA)){
if(is.na(activityDataNoNA[i,]$steps)){
activityDataNoNA[i,]$steps <- getMeanStepsPerInterval(activityDataNoNA[i,]$interval)
}
}
totalStepsPerDayNoNA <- aggregate(steps ~ date, data=activityDataNoNA, sum)
hist(totalStepsPerDayNoNA$steps)
activityDataNoNA$date <- as.Date(strptime(activityDataNoNA$date, format="%Y-%m-%d"))
activityDataNoNA$day <- weekdays(activityDataNoNA$date)
for (i in 1:nrow(activityDataNoNA)) {
if (activityDataNoNA[i,]$day %in% c("Saturday","Sunday")) {
activityDataNoNA[i,]$day<-"weekend"
}
else{
activityDataNoNA[i,]$day<-"weekday"
}
}
stepsByDay <- aggregate(activityDataNoNA$steps ~ activityDataNoNA$interval + activityDataNoNA$day, activityDataNoNA, mean)
type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all weekday days or weekend days (y-axis). The plot should look something like the following, which was created using simulated data:names(stepsByDay) <- c("interval", "day", "steps")
library(lattice)
xyplot(steps ~ interval | day, stepsByDay, type = "l", layout = c(1, 2),
xlab = "Interval", ylab = "Number of steps")