library(readr)
activity <- read_csv("C:/Users/FACPAN/Desktop/RepData_PeerAssessment1/activity/activity.csv")
## Parsed with column specification:
## cols(
## steps = col_double(),
## date = col_date(format = ""),
## interval = col_double()
## )
View(activity)
head(activity)
## # A tibble: 6 x 3
## steps date interval
## <dbl> <date> <dbl>
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
total.steps <- tapply(activity$steps, activity$date, FUN=sum, na.rm=TRUE)
qplot(total.steps, binwidth=1000, xlab="total number of steps taken each day")
mean(total.steps, na.rm=TRUE)
## [1] 9354.23
median(total.steps, na.rm=TRUE)
## [1] 10395
library(ggplot2)
averages <- aggregate(x=list(steps=activity$steps), by=list(interval=activity$interval),
FUN=mean, na.rm=TRUE)
ggplot(data=averages, aes(x=interval, y=steps)) +
geom_line() +
xlab("5-minute interval") +
ylab("average number of steps taken")
averages[which.max(averages$steps),]
## interval steps
## 104 835 206.1698
missing <- is.na(activity$steps)
table(missing)
## missing
## FALSE TRUE
## 15264 2304
The total count of NA in the dataset is 2304.
interval.
# Replace each missing value with the mean value of its 5-minute interval
fill.value <- function(steps, interval) {
filled <- NA
if (!is.na(steps))
filled <- c(steps)
else
filled <- (averages[averages$interval==interval, "steps"])
return(filled)
}
filled.activity <- activity
filled.activity$steps <- mapply(fill.value, filled.activity$steps, filled.activity$interval)
total.steps <- tapply(filled.activity$steps, filled.activity$date, FUN=sum)
qplot(total.steps, binwidth=1000, xlab="total number of steps taken each day")
mean(total.steps)
## [1] 10766.19
median(total.steps)
## [1] 10766.19
that in the original data, there are some days with steps
values NA
for any interval
. The total number of steps taken in such days are set to 0s by default. However, after replacing missing steps
values with the mean steps
of associated interval
value, these 0 values are removed from the histogram of total number of steps taken each day.
First, let’s find the day of the week for each measurement in the dataset. In this part, we use the dataset with the filled-in values.
weekday.or.weekend <- function(date) {
day <- weekdays(date)
if (day %in% c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday"))
return("weekday")
else if (day %in% c("Saturday", "Sunday"))
return("weekend")
else
stop("invalid date")
}
filled.activity$date <- as.Date(filled.activity$date)
filled.activity$day <- sapply(filled.activity$date, FUN=weekday.or.weekend)
Now, let’s make a panel plot containing plots of average number of steps taken on weekdays and weekends.
Now, let’s make a panel plot containing plots of average number of steps taken on weekdays and weekends.
averages <- aggregate(steps ~ interval + day, filled.activity, mean)
ggplot(averages, aes(interval, steps)) + geom_line() + facet_grid(day ~ .) +
xlab("5-minute interval") + ylab("Number of steps")