========================================================
1. Loading and preprocessing data
dat <- read.csv("activity.csv")
dat$date <- as.Date(as.character(dat$date), format = "%Y-%m-%d")
2. Total Number of steps per day
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
sub1 <- subset(dat, steps >= 0, select = c(steps, date))
grouped <- group_by(sub1, date)
Totals <- summarise(grouped, Total.Steps = sum(steps, na.rm = TRUE))
kable(Totals, caption = "Total number of steps per day", align = "l", format = "markdown")
| date | Total.Steps |
|---|---|
| 2012-10-02 | 126 |
| 2012-10-03 | 11352 |
| 2012-10-04 | 12116 |
| 2012-10-05 | 13294 |
| 2012-10-06 | 15420 |
| 2012-10-07 | 11015 |
| 2012-10-09 | 12811 |
| 2012-10-10 | 9900 |
| 2012-10-11 | 10304 |
| 2012-10-12 | 17382 |
| 2012-10-13 | 12426 |
| 2012-10-14 | 15098 |
| 2012-10-15 | 10139 |
| 2012-10-16 | 15084 |
| 2012-10-17 | 13452 |
| 2012-10-18 | 10056 |
| 2012-10-19 | 11829 |
| 2012-10-20 | 10395 |
| 2012-10-21 | 8821 |
| 2012-10-22 | 13460 |
| 2012-10-23 | 8918 |
| 2012-10-24 | 8355 |
| 2012-10-25 | 2492 |
| 2012-10-26 | 6778 |
| 2012-10-27 | 10119 |
| 2012-10-28 | 11458 |
| 2012-10-29 | 5018 |
| 2012-10-30 | 9819 |
| 2012-10-31 | 15414 |
| 2012-11-02 | 10600 |
| 2012-11-03 | 10571 |
| 2012-11-05 | 10439 |
| 2012-11-06 | 8334 |
| 2012-11-07 | 12883 |
| 2012-11-08 | 3219 |
| 2012-11-11 | 12608 |
| 2012-11-12 | 10765 |
| 2012-11-13 | 7336 |
| 2012-11-15 | 41 |
| 2012-11-16 | 5441 |
| 2012-11-17 | 14339 |
| 2012-11-18 | 15110 |
| 2012-11-19 | 8841 |
| 2012-11-20 | 4472 |
| 2012-11-21 | 12787 |
| 2012-11-22 | 20427 |
| 2012-11-23 | 21194 |
| 2012-11-24 | 14478 |
| 2012-11-25 | 11834 |
| 2012-11-26 | 11162 |
| 2012-11-27 | 13646 |
| 2012-11-28 | 10183 |
| 2012-11-29 | 7047 |
hist(Totals$Total.Steps, xlab = "Total steps", main = "Total number of steps", col = "blue", breaks = 8)
mean.med <- summarise(grouped, Mean = round(mean(steps, na.rm = TRUE)), Median = median(steps, na.rm = TRUE))
kable(mean.med, caption = "Mean and Median steps per day",align = "c", format = "markdown")
| date | Mean | Median |
|---|---|---|
| 2012-10-02 | 0 | 0 |
| 2012-10-03 | 39 | 0 |
| 2012-10-04 | 42 | 0 |
| 2012-10-05 | 46 | 0 |
| 2012-10-06 | 54 | 0 |
| 2012-10-07 | 38 | 0 |
| 2012-10-09 | 44 | 0 |
| 2012-10-10 | 34 | 0 |
| 2012-10-11 | 36 | 0 |
| 2012-10-12 | 60 | 0 |
| 2012-10-13 | 43 | 0 |
| 2012-10-14 | 52 | 0 |
| 2012-10-15 | 35 | 0 |
| 2012-10-16 | 52 | 0 |
| 2012-10-17 | 47 | 0 |
| 2012-10-18 | 35 | 0 |
| 2012-10-19 | 41 | 0 |
| 2012-10-20 | 36 | 0 |
| 2012-10-21 | 31 | 0 |
| 2012-10-22 | 47 | 0 |
| 2012-10-23 | 31 | 0 |
| 2012-10-24 | 29 | 0 |
| 2012-10-25 | 9 | 0 |
| 2012-10-26 | 24 | 0 |
| 2012-10-27 | 35 | 0 |
| 2012-10-28 | 40 | 0 |
| 2012-10-29 | 17 | 0 |
| 2012-10-30 | 34 | 0 |
| 2012-10-31 | 54 | 0 |
| 2012-11-02 | 37 | 0 |
| 2012-11-03 | 37 | 0 |
| 2012-11-05 | 36 | 0 |
| 2012-11-06 | 29 | 0 |
| 2012-11-07 | 45 | 0 |
| 2012-11-08 | 11 | 0 |
| 2012-11-11 | 44 | 0 |
| 2012-11-12 | 37 | 0 |
| 2012-11-13 | 25 | 0 |
| 2012-11-15 | 0 | 0 |
| 2012-11-16 | 19 | 0 |
| 2012-11-17 | 50 | 0 |
| 2012-11-18 | 52 | 0 |
| 2012-11-19 | 31 | 0 |
| 2012-11-20 | 16 | 0 |
| 2012-11-21 | 44 | 0 |
| 2012-11-22 | 71 | 0 |
| 2012-11-23 | 74 | 0 |
| 2012-11-24 | 50 | 0 |
| 2012-11-25 | 41 | 0 |
| 2012-11-26 | 39 | 0 |
| 2012-11-27 | 47 | 0 |
| 2012-11-28 | 35 | 0 |
| 2012-11-29 | 24 | 0 |
3. Average daily activity
library(ggplot2)
sub2 <- subset(dat, steps >= 0, select = c(steps, interval))
grouped2 <- group_by(sub2, interval)
daily_interval_avg <- summarise(grouped2, Avg.Steps = mean(steps))
g <- ggplot(daily_interval_avg, aes(interval, Avg.Steps))
g+geom_line(colour = "purple")+ggtitle("Daily average steps taken per inteval")+ ylab("Average Steps")
i <- which.max(daily_interval_avg$Avg.Steps)
daily_interval_avg$interval[i]
## [1] 835
The interval with maximun number of steps averaged by all days is 835
4. Missing values
icc <- !complete.cases(dat)
Nas <- sum(icc)
There are 2304 rows with missing values
dat1 <- dat
m <- round(mean(dat1[,1], na.rm = TRUE))
for(i in 1: nrow(dat1)){
if (is.na(dat1[i,1]) == TRUE){
dat1[i,1] <- m
}else{
dat1[i,1] <- dat1[i,1]
}
}
Totals1 <- summarise(group_by(subset(dat1, select = c(steps,date)),date), Total.Steps = sum(steps, na.rm = TRUE))
hist(Totals1$Total.Steps, xlab = "Total steps", main = "Total number of steps", col = "brown", breaks = 8)
mean.med1 <- summarise(group_by(subset(dat1, select = c(steps,date)),date), Mean = round(mean(steps, na.rm = TRUE)), Median = median(steps, na.rm = TRUE))
kable(mean.med1, caption = "Mean and Median steps per day", align = "c", format = "markdown")
| date | Mean | Median |
|---|---|---|
| 2012-10-01 | 37 | 37 |
| 2012-10-02 | 0 | 0 |
| 2012-10-03 | 39 | 0 |
| 2012-10-04 | 42 | 0 |
| 2012-10-05 | 46 | 0 |
| 2012-10-06 | 54 | 0 |
| 2012-10-07 | 38 | 0 |
| 2012-10-08 | 37 | 37 |
| 2012-10-09 | 44 | 0 |
| 2012-10-10 | 34 | 0 |
| 2012-10-11 | 36 | 0 |
| 2012-10-12 | 60 | 0 |
| 2012-10-13 | 43 | 0 |
| 2012-10-14 | 52 | 0 |
| 2012-10-15 | 35 | 0 |
| 2012-10-16 | 52 | 0 |
| 2012-10-17 | 47 | 0 |
| 2012-10-18 | 35 | 0 |
| 2012-10-19 | 41 | 0 |
| 2012-10-20 | 36 | 0 |
| 2012-10-21 | 31 | 0 |
| 2012-10-22 | 47 | 0 |
| 2012-10-23 | 31 | 0 |
| 2012-10-24 | 29 | 0 |
| 2012-10-25 | 9 | 0 |
| 2012-10-26 | 24 | 0 |
| 2012-10-27 | 35 | 0 |
| 2012-10-28 | 40 | 0 |
| 2012-10-29 | 17 | 0 |
| 2012-10-30 | 34 | 0 |
| 2012-10-31 | 54 | 0 |
| 2012-11-01 | 37 | 37 |
| 2012-11-02 | 37 | 0 |
| 2012-11-03 | 37 | 0 |
| 2012-11-04 | 37 | 37 |
| 2012-11-05 | 36 | 0 |
| 2012-11-06 | 29 | 0 |
| 2012-11-07 | 45 | 0 |
| 2012-11-08 | 11 | 0 |
| 2012-11-09 | 37 | 37 |
| 2012-11-10 | 37 | 37 |
| 2012-11-11 | 44 | 0 |
| 2012-11-12 | 37 | 0 |
| 2012-11-13 | 25 | 0 |
| 2012-11-14 | 37 | 37 |
| 2012-11-15 | 0 | 0 |
| 2012-11-16 | 19 | 0 |
| 2012-11-17 | 50 | 0 |
| 2012-11-18 | 52 | 0 |
| 2012-11-19 | 31 | 0 |
| 2012-11-20 | 16 | 0 |
| 2012-11-21 | 44 | 0 |
| 2012-11-22 | 71 | 0 |
| 2012-11-23 | 74 | 0 |
| 2012-11-24 | 50 | 0 |
| 2012-11-25 | 41 | 0 |
| 2012-11-26 | 39 | 0 |
| 2012-11-27 | 47 | 0 |
| 2012-11-28 | 35 | 0 |
| 2012-11-29 | 24 | 0 |
| 2012-11-30 | 37 | 37 |
5. Weekdays Vs Weekends
dat1 <- mutate(dat1, Weekday = weekdays(dat1$date))
for(i in 1:nrow(dat1)){
if(dat1[i,4] == "Saturday" | dat1[i,4] == "Sunday"){
dat1[i,4] <- "weekend"
}else if(dat1[i,4] == "Monday" | dat1[i,4] == "Tuesday" | dat1[i,4] == "Wednesday" | dat1[i,4] == "Thursday" | dat1[i,4] == "Friday"){
dat1[i,4] <- "weekday"
}
}
dat1$Weekday <- as.factor(dat1$Weekday)
sub3 <- subset(dat1, select = c(steps, interval, Weekday))
grouped3 <- group_by(sub3, Weekday, interval)
interval_Avg.Steps <- summarise(grouped3, Avg.Steps = mean(steps))
g <- ggplot(interval_Avg.Steps, aes(interval, Avg.Steps))
g <- g + geom_line(colour = "brown") +facet_grid(Weekday ~.)
g <- g+ xlab("Interval") + ylab("Number of Steps") + ggtitle("5-minute interval average number of steps taken")
g