URL<- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip"
download.file(URL,"./activity.zip",method="curl")
unzip("./activity.zip")
library(knitr)
## Warning: package 'knitr' was built under R version 3.3.2
data<-read.csv("activity.csv",as.is=T)
act<- data[complete.cases(data),]
ag <- aggregate(steps~date,act,sum)
hist(ag$steps)
print(mean(ag$steps))
## [1] 10766.19
print(median(ag$steps))
## [1] 10765
ave_int<-aggregate(steps~interval,act,mean)
plot(ave_int$interval,ave_int$steps,type="l")
max<-which.max(ave_int$steps)
print(paste(c("The interval"),ave_int[max,]$interval, c("contains the maximum number of steps")))
## [1] "The interval 835 contains the maximum number of steps"
miss<-data[!complete.cases(data),]
nrow(miss)
## [1] 2304
Find out NA and subtitute it with the mean of the interval
for (i in 1:nrow(data)) {
if(is.na(data$steps[i])) {
x <- ave_int$steps[which(ave_int$interval == data$interval[i])]
data$steps[i] <- x
}
}
ag_steps<- aggregate(steps~date,data,sum)
hist(ag_steps$steps)
print(mean(ag_steps$steps))
## [1] 10766.19
print(median(ag_steps$steps))
## [1] 10766.19
So, the mean hasn’t changed, but the median is different from before
week_day <- function(date_val) {
wd <- weekdays(as.Date(date_val, '%Y-%m-%d'))
if (!(wd == 'Saturday' || wd == 'Sunday')) {
a <- 'Weekday'
} else {
a <- 'Weekend'
}
a
}
data$day_type <- as.factor(sapply(data$date, week_day))
library(ggplot2)
ag_steps2<- aggregate(steps ~ interval+day_type, data, mean)
g <- ggplot(ag_steps2, aes(interval, steps)) +
geom_line(stat = "identity", aes(colour = day_type)) +
theme_gray() +
facet_grid(day_type ~ ., scales="fixed", space="fixed") +
labs(x="Interval", y=expression("No of Steps")) +
ggtitle("No of steps Per Interval by day type")
print(g)