Title: “Reproducible Research: Peer Assignment 1” Author: “Mohamed Rizwan” Date: “March 15, 2019” Output: html_document ##loading libraries
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
unzip("repdata_data_activity.zip")
act <- read.csv("activity.csv")
act$date <- as.Date(act$date)
head(act)
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
by_date <- act %>% select(date, steps) %>% group_by(date) %>% summarize(totalsteps= sum(steps)) %>% na.omit()
hist(by_date$totalsteps, xlab = "Total number of steps taken each day", main="Histogram of Total number of steps taken each day", breaks = 20)
summary(by_date)
## date totalsteps
## Min. :2012-10-02 Min. : 41
## 1st Qu.:2012-10-16 1st Qu.: 8841
## Median :2012-10-29 Median :10765
## Mean :2012-10-30 Mean :10766
## 3rd Qu.:2012-11-16 3rd Qu.:13294
## Max. :2012-11-29 Max. :21194
library(ggplot2)
by_interval <- act %>% select(interval, steps) %>% na.omit() %>% group_by(interval) %>% summarize(averagesteps= mean(steps))
ggplot(by_interval, aes(x=interval, y=averagesteps))+ geom_line()
by_interval[which(by_interval$averagesteps == max(by_interval$averagesteps)),]
## # A tibble: 1 x 2
## interval averagesteps
## <int> <dbl>
## 1 835 206.
missingvals <- sum(is.na(act))
missingvals
## [1] 2304
replacewithmean <- function(x) replace(x, is.na(x), mean(x, na.rm = TRUE))
meandata <- act %>% group_by(interval) %>% mutate(steps=replacewithmean(steps))
head(meandata)
## # A tibble: 6 x 3
## # Groups: interval [6]
## steps date interval
## <dbl> <date> <int>
## 1 1.72 2012-10-01 0
## 2 0.340 2012-10-01 5
## 3 0.132 2012-10-01 10
## 4 0.151 2012-10-01 15
## 5 0.0755 2012-10-01 20
## 6 2.09 2012-10-01 25
by_date2 <- meandata %>% select(date, steps) %>% group_by(date) %>% summarize(totalsteps= sum(steps))
## Adding missing grouping variables: `interval`
summary(by_date2)
## date totalsteps
## Min. :2012-10-01 Min. : 41
## 1st Qu.:2012-10-16 1st Qu.: 9819
## Median :2012-10-31 Median :10766
## Mean :2012-10-31 Mean :10766
## 3rd Qu.:2012-11-15 3rd Qu.:12811
## Max. :2012-11-30 Max. :21194
hist(by_date2$totalsteps, xlab = "Total number of steps taken each day", main="Histogram of Total number of steps taken each day", breaks = 20)
mean_1 <- mean(by_date$totalsteps, na.rm = TRUE)
mean_2 <- mean(by_date2$totalsteps)
difference <- mean_2-mean_1
difference
## [1] 0
meandata$day <- weekdays(meandata$date)
meandata$weekend <- ifelse(meandata$day=="Saturday" | meandata$day=="Sunday", "Weekend", "Weekday")
mean_weekend_weekday <- aggregate(meandata$steps, by= list(meandata$weekend, meandata$interval), na.omit(mean))
names(mean_weekend_weekday) <- c("weekend", "interval", "steps")
head(mean_weekend_weekday)
## weekend interval steps
## 1 Weekday 0 2.25115304
## 2 Weekend 0 0.21462264
## 3 Weekday 5 0.44528302
## 4 Weekend 5 0.04245283
## 5 Weekday 10 0.17316562
## 6 Weekend 10 0.01650943
ggplot(mean_weekend_weekday, aes(x= interval, y= steps, color=weekend)) + geom_line()+
facet_grid(weekend ~.) + xlab("5 min-Interval") + ylab("Average number of the Steps taken") +
ggtitle("Comparison of average number of steps in each interval")