This data is taken from the Coursera Assignment. It comes from the activity monitoring devices such as Fitbit, Nike Fuelband, or Jawbone Up. This device collects data at 5 minute intervals through out the day. It consists of two months of data from an anonymous individual collected during the months of October and November, 2012 and include the number of steps taken in 5 minute intervals each day.
library(ggplot2)
library(dplyr)
library(lubridate)
data <- read.csv("./activity.csv")
data$date <- as.Date(data$date)
stepSumPerDay <- data %>% group_by(date) %>% summarise(stepSum = sum(steps, na.rm = TRUE))
plot1 <- ggplot(stepSumPerDay, aes(x = stepSum)) + geom_histogram(bins = 50) +
labs(x = "Total Steps Taken per Day", y = "Frequency")
print(plot1)
stepMean <- mean(stepSumPerDay$stepSum)
stepMedian <- median(stepSumPerDay$stepSum)
stepSumPerInterval <- data %>% group_by(interval) %>% summarise(stepMeanInterval = mean(steps, na.rm = TRUE))
plot2 <- ggplot(stepSumPerInterval, aes(x = interval, y = stepMeanInterval)) + geom_line() +
labs(x = "Interval", y = "Average Step")
print(plot2)
maximumStep <- which.max(stepSumPerInterval$stepMeanInterval)
intervalMax <- stepSumPerInterval[maximumStep, 1]
Maximum step occured at @ 8:35
totalNA <- length(data$steps[is.na(data$steps)])
Total number of NAs : 2304
stepSumPerInterval <- data %>% group_by(interval) %>% summarise(stepMeanInterval = mean(steps, na.rm = TRUE))
data_2 <- merge(data, stepSumPerInterval)
data_2 <- data_2[order(data_2$date),]
data_2$steps <- ifelse(is.na(data_2$steps), data_2$stepMeanInterval, data_2$steps)
data_2 <- data_2[, c(2,3,1)]
stepSumPerDay_2 <- data_2 %>% group_by(date) %>% summarise(stepSum = sum(steps, na.rm = TRUE))
plot3 <- ggplot(stepSumPerDay_2, aes(x = stepSum)) + geom_histogram(bins = 50) +
labs(x = "Total Steps Taken per Day", y = "Frequency")
print(plot3)
Noticeably, the chart looks more “bell-shaped” compared to the first chart.
stepSumPerDay_2 <- data_2 %>% group_by(date) %>% summarise(stepSum = sum(steps, na.rm = TRUE))
stepMean_2 <- mean(stepSumPerDay_2$stepSum)
stepMedian_2 <- median(stepSumPerDay_2$stepSum)
data_2$weekday <- wday(data_2$date)
data_2$daytype[data_2$weekday > 1 | data_2$weekday < 7] <- "weekdays"
data_2$daytype[data_2$weekday == 1 | data_2$weekday == 7] <- "weekends"
stepSumPerInterval_2 <- data_2 %>% group_by(daytype, interval) %>% summarise(stepMeanInterval = mean(steps, na.rm = TRUE))
plot4 <- ggplot(stepSumPerInterval_2, aes(x = interval, y = stepMeanInterval)) + geom_line() + facet_wrap(~daytype, nrow = 2, scales = "free_y") +
labs(x = "Interval", y = "Average Steps")
print(plot4)
The graph shows difference between weekdays and weekends.