#
setwd("D:/R/Work/Reproducible Research")
#
activity <- read.csv("./data/activity.csv", header = TRUE)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
daily_activity <- aggregate(steps~date, activity, sum)
# plotting the histogram of the number of steps taken per day
ggplot(daily_activity, aes(steps)) +
geom_histogram(fill = "pink", color = "black", binwidth = 2000) +
labs(x = "Daily steps",
title = "Total number of steps taken each day")
### Calculate and report the mean and median of the total number of steps taken per day###
#calculate the mean and median of the daily steps taken.
mean_daily_act <- mean(daily_activity$steps, na.rm = TRUE)
median_daily_act <- median(daily_activity$steps, na.rm = TRUE)
min_inter_steps <- aggregate(steps~interval, activity, mean)
# plot
ggplot(min_inter_steps,aes(interval)) +
geom_line(aes(y = steps),color = "purple") +
labs(x = "5 min intervals",
y = "average steps",
title = "5 min interval plot")
max_steps <- min_inter_steps[which.max(min_inter_steps$steps),]$interval
sum(is.na(activity$steps))
## [1] 2304
mod_activity <- activity
for (i in 1:nrow(mod_activity))
{
if (is.na(mod_activity$steps[i]))
{
mod_activity$steps[i] <- min_inter_steps[which(mod_activity$interval[i] == min_inter_steps$interval),]$steps
}
}
summary(mod_activity)
## steps date interval
## Min. : 0.00 2012-10-01: 288 Min. : 0.0
## 1st Qu.: 0.00 2012-10-02: 288 1st Qu.: 588.8
## Median : 0.00 2012-10-03: 288 Median :1177.5
## Mean : 37.38 2012-10-04: 288 Mean :1177.5
## 3rd Qu.: 27.00 2012-10-05: 288 3rd Qu.:1766.2
## Max. :806.00 2012-10-06: 288 Max. :2355.0
## (Other) :15840
head(mod_activity)
## steps date interval
## 1 1.7169811 2012-10-01 0
## 2 0.3396226 2012-10-01 5
## 3 0.1320755 2012-10-01 10
## 4 0.1509434 2012-10-01 15
## 5 0.0754717 2012-10-01 20
## 6 2.0943396 2012-10-01 25
tail(mod_activity)
## steps date interval
## 17563 2.6037736 2012-11-30 2330
## 17564 4.6981132 2012-11-30 2335
## 17565 3.3018868 2012-11-30 2340
## 17566 0.6415094 2012-11-30 2345
## 17567 0.2264151 2012-11-30 2350
## 17568 1.0754717 2012-11-30 2355
sum(is.na(activity$steps))
## [1] 2304
#
mod_daily_activity <- aggregate(steps~date, mod_activity, sum)
# plotting the histogram of the number of steps taken per day
ggplot(mod_daily_activity, aes(steps)) +
geom_histogram(fill = "pink", color = "black", binwidth = 2000) +
labs(x = "Daily steps",
title = "Total number of steps taken each day")
#calculate the mean and median of the daily steps taken.
mod_mean_daily_act <- mean(mod_daily_activity$steps, na.rm = TRUE)
mod_median_daily_act <- median(mod_daily_activity$steps, na.rm = TRUE)
mean_daily_act
## [1] 10766.19
mod_mean_daily_act
## [1] 10766.19
median_daily_act
## [1] 10765
mod_median_daily_act
## [1] 10766.19
mod_activity$week <- as.factor(ifelse(weekdays(as.Date(mod_activity$date),
abbreviate = FALSE) %in%
c("Saturday","Sunday"),
"Weekend", "Weekday"))
str(mod_activity)
## 'data.frame': 17568 obs. of 4 variables:
## $ steps : num 1.717 0.3396 0.1321 0.1509 0.0755 ...
## $ date : Factor w/ 61 levels "2012-10-01","2012-10-02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ interval: int 0 5 10 15 20 25 30 35 40 45 ...
## $ week : Factor w/ 2 levels "Weekday","Weekend": 1 1 1 1 1 1 1 1 1 1 ...
aggr_activity <- aggregate(steps~interval + week, mod_activity, mean)
ggplot(aggr_activity, aes(interval, steps)) +
geom_line() +
facet_grid(week ~ .) +
xlab("5-minute interval") +
ylab("avarage number of steps")