# Get the data from my working directory
rep <- read.csv("activity.csv")

# make date column Dates instead of Factor
rep$date <- as.character(rep$date)
rep$dates <- as.Date(rep$date, "%Y-%m-%d")

# remove NAs
data<-na.omit(rep)

# Histogram of the total number of steps taken each day
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
stepsByDay <- tapply(rep$steps, rep$date, sum, na.rm=TRUE)
barplot(stepsByDay)

#Mean and Median number of steps taken each day
mean(stepsByDay)
## [1] 9354.23
# answer = 9354.23
median(stepsByDay)
## [1] 10395
# answer = 10395

#Time series plot of the average number of steps taken
meanplot <-ggplot(data = data, aes(dates, steps)) +
    stat_summary(fun.y = mean, geom = "bar")
meanplot+ggtitle("Mean Steps per Day")

#The 5-minute interval that, on average, contains the maximum number of steps
stepsByInterval <- tapply(data$steps, data$interval, mean, na.rm=TRUE)
summary(stepsByInterval)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.486  34.110  37.380  52.830 206.200
sort(stepsByInterval)[length(stepsByInterval)]
##      835 
## 206.1698
#answer is interval 835

#Code to describe and show a strategy for imputing missing data
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
rep$steps<-na.aggregate(rep$steps)
summary(rep)
##      steps            date              interval          dates           
##  Min.   :  0.00   Length:17568       Min.   :   0.0   Min.   :2012-10-01  
##  1st Qu.:  0.00   Class :character   1st Qu.: 588.8   1st Qu.:2012-10-16  
##  Median :  0.00   Mode  :character   Median :1177.5   Median :2012-10-31  
##  Mean   : 37.38                      Mean   :1177.5   Mean   :2012-10-31  
##  3rd Qu.: 37.38                      3rd Qu.:1766.2   3rd Qu.:2012-11-15  
##  Max.   :806.00                      Max.   :2355.0   Max.   :2012-11-30
#Histogram of the total number of steps taken each day after missing values are imputed
stepsByDayNA <- tapply(rep$steps, rep$date, sum, na.rm=TRUE)
barplot(stepsByDayNA)

#Panel plot comparing the average number of steps taken per 5-minute interval across weekdays and weekends
rep$week <- weekdays(rep$dates)
rep <- mutate(rep, weekpart = ifelse(week == "Saturday" | week=="Sunday", "weekend", "weekday"))
str(rep)
## 'data.frame':    17568 obs. of  6 variables:
##  $ steps   : num  37.4 37.4 37.4 37.4 37.4 ...
##  $ date    : chr  "2012-10-01" "2012-10-01" "2012-10-01" "2012-10-01" ...
##  $ interval: int  0 5 10 15 20 25 30 35 40 45 ...
##  $ dates   : Date, format: "2012-10-01" "2012-10-01" ...
##  $ week    : chr  "Monday" "Monday" "Monday" "Monday" ...
##  $ weekpart: chr  "weekday" "weekday" "weekday" "weekday" ...
rep$weekpart <- as.factor(rep$weekpart)

byDay<-aggregate(steps ~ interval+weekpart, data = rep, mean)
head(byDay)
##   interval weekpart    steps
## 1        0  weekday 7.006569
## 2        5  weekday 5.384347
## 3       10  weekday 5.139902
## 4       15  weekday 5.162124
## 5       20  weekday 5.073235
## 6       25  weekday 6.295458
ggplot(byDay, aes(interval, steps)) + 
    geom_line() + 
    facet_grid(weekpart ~ .) +
    xlab("5-minute Interval") + 
    ylab("Mean Steps")