library(knitr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
# opts_chunk$set(echo = TRUE)

setwd("C:\\DS\\Week2\\RepData_PeerAssessment1-master")

Loading/ preprocessing the data

dr<- read.csv('activity.csv')
activity_data <- dr[ with (dr, { !(is.na(steps)) } ), ]

What is the mean total number of steps taken per day?

activity_data_by_day <- group_by(activity_data, date)
stepsByDay <- summarise(activity_data_by_day, total = sum(steps))
stepsByDay
## # A tibble: 53 x 2
##    date       total
##    <fct>      <int>
##  1 2012-10-02   126
##  2 2012-10-03 11352
##  3 2012-10-04 12116
##  4 2012-10-05 13294
##  5 2012-10-06 15420
##  6 2012-10-07 11015
##  7 2012-10-09 12811
##  8 2012-10-10  9900
##  9 2012-10-11 10304
## 10 2012-10-12 17382
## # ... with 43 more rows
hist(stepsByDay$total, main="Total steps per day", 
     xlab="Total steps in a day",col="green")

summary(stepsByDay)
##          date        total      
##  2012-10-02: 1   Min.   :   41  
##  2012-10-03: 1   1st Qu.: 8841  
##  2012-10-04: 1   Median :10765  
##  2012-10-05: 1   Mean   :10766  
##  2012-10-06: 1   3rd Qu.:13294  
##  2012-10-07: 1   Max.   :21194  
##  (Other)   :47

What is the average daily activity pattern?

stepsByInterval <- aggregate(steps ~ interval, activity_data, mean)

plot(stepsByInterval$interval, stepsByInterval$steps, type='l', 
     main="Avg steps - all days", xlab="Interval", 
     ylab="Avg steps")

Maximum_steps_no <- which.max(stepsByInterval$steps)

stepsByInterval[Maximum_steps_no, ]
##     interval    steps
## 104      835 206.1698

Impute missing values

sum(is.na(dr))
## [1] 2304
da_imputed <- dr
for (i in 1:nrow(da_imputed)) {
  if (is.na(da_imputed$steps[i])) {
    interval <- da_imputed$interval[i]
    steps <- stepsByInterval[
      stepsByInterval$interval == interval,]
    da_imputed$steps[i] <- steps$steps
  }
}

impStepByDday <- aggregate(steps ~ date, da_imputed, sum)
head(impStepByDday)
##         date    steps
## 1 2012-10-01 10766.19
## 2 2012-10-02   126.00
## 3 2012-10-03 11352.00
## 4 2012-10-04 12116.00
## 5 2012-10-05 13294.00
## 6 2012-10-06 15420.00
hist(impStepByDday$steps, main="Total No. of steps / day (imputed)", 
     xlab="Total #  of steps per day")

mean(impStepByDday$steps)
## [1] 10766.19
median(impStepByDday$steps)
## [1] 10766.19
mean(stepsByDay$total)
## [1] 10766.19
median(stepsByDay$total)
## [1] 10765

Are there differences in activity patterns between weekdays and weekends?

WeekDay <- function(date_val) {
    wd <- weekdays(as.Date(date_val, '%Y-%m-%d'))
    if  (!(wd == 'Saturday' || wd == 'Sunday')) {
        rtn <- 'Weekday'
    } else {
        rtn <- 'Weekend'
    }
    rtn
}

Showing the chart

dr$day_type <- as.factor(sapply(dr$date, WeekDay))


stepsPerDayImpute <- aggregate(steps ~ interval+day_type, dr, mean)


chrt <- ggplot(stepsPerDayImpute, aes(interval, steps)) +
    geom_line(stat = "identity", aes(colour = day_type)) +
    theme_gray() +
    facet_grid(day_type ~ ., scales="fixed", space="fixed") +
    labs(x="Interval", y=expression("No of Steps")) +
    ggtitle("# of steps/Interval by daytype")

print(chrt)