##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip",destfile = "rawdata.zip")
unzip("rawdata.zip")
data<- read.csv("activity.csv")
data<- read.csv("activity.csv")
dat <- select(data, steps, date) %>%
group_by(date) %>%
summarize(total_steps= sum(steps, na.rm = T))
with(dat, hist(total_steps, main="Total number of steps taken each day"))
The mean and median total number of steps taken per day:
select(dat, total_steps) %>%
summarise(mean= mean(total_steps), median= median(total_steps)) %>% as.data.frame
## mean median
## 1 9354.23 10395
library(lubridate)
dat <- group_by(data, interval) %>%
summarize(mean_steps= mean(steps, na.rm = T)) %>%
with(plot(interval, mean_steps,main = "Average Number of Steps Taken Each Day", type = 'l', xlab = "5-min Interval", ylab = "Average Number of Steps"))
group_by(data, interval) %>%
summarize(mean_steps= mean(steps, na.rm = T)) %>%
filter(mean_steps==max(mean_steps), na.rm = T) %>%
select(the_interval_maximize_steps= interval) %>% as.data.frame
## the_interval_maximize_steps
## 1 835
sum(is.na(data$steps))
## [1] 2304
## mean steps table for intervals
dat <- group_by(data, interval) %>%
summarize(mean_steps= mean(steps, na.rm = T)) %>% as.data.frame
## the replace function to replace the NAs with the means steps across days
replace_value <- function(steps,interval_value){
if (!is.na(steps)) {steps}
else {dat[dat$interval==interval_value,2]}
}
## replace the NAs in the data
n= length(data$steps)
for (i in 1:n){
data$steps[i]= replace_value(data$steps[i],data$interval[i])
}
dat <- select(data, steps, date) %>%
group_by(date) %>%
summarize(total_steps= sum(steps))
with(dat, hist(total_steps, main="Total number of steps taken each day"))
The mean and median total number of steps taken per day.
select(dat, total_steps) %>%
summarise(mean= mean(total_steps), median= median(total_steps)) %>% as.data.frame
## mean median
## 1 10766.19 10766.19
From the report we can see that both values are increased. and the histigram becomes more concentrated to the central.
data<- mutate(data, date= as.Date(date))
day_type<- function(date){
if (weekdays(date)=="Sunday"|weekdays(date)=="Saturday") {"weekend"}
else {"workday"}
}
dat <- mutate(data, type= sapply(date,day_type)) %>%
mutate(type = as.factor(type))
plotdat <- select(dat, steps, interval, type) %>%
group_by(interval,type) %>%
summarize(steps= mean(steps))
xyplot(steps-mean(steps)~interval|type, layout= c(1,2), data = plotdat, type ="l")
qplot(interval, steps, data = plotdat, facets = .~type, geom = c("point","smooth"))