With the advancement of technology ,it is now possible to collect a large amount of data about personal movement using activity monitoring devices such as a Fitbit, Nike Fuelband, or Jawbone Up. These type of devices are part of the “quantified self” movement – a group of enthusiasts who take measurements about themselves regularly to improve their health, to find patterns in their behavior, or because they are tech geeks. But these data remain under-utilized both because the raw data are hard to obtain and there is a lack of statistical methods and software for processing and interpreting the data.
Here we have used an activity data set to analyse and understand the pattern of steps of an individual taken at 5 minute intervals.
library(knitr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
opts_chunk$set(echo = TRUE)
library(ggplot2)
1.1 Reading the dataset into R
1.2 converting the dates into date class variable
activitydata <- read.csv2("activity.csv",sep = ",",header = TRUE)
activitydata$date = as.Date(activitydata$date, "%Y-%m-%d")
head(activitydata)
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
2.1 Copying the original data to another data frame for analysis.
2.2 Converting the dates into factors
2.3 Summarising the daily steps grouped by the dates.
testdata <- activitydata
testdata$date <- as.factor(testdata$date)
grouped_dates <- testdata %>% group_by(date) %>% summarise(steps=sum(steps))
grouped_dates$date <-as.Date(grouped_dates$date,"%Y-%m-%d")
head(grouped_dates)
## # A tibble: 6 x 2
## date steps
## <date> <int>
## 1 2012-10-01 NA
## 2 2012-10-02 126
## 3 2012-10-03 11352
## 4 2012-10-04 12116
## 5 2012-10-05 13294
## 6 2012-10-06 15420
2.4 Plotting the histogram for total number of steps taken each day.
plot(grouped_dates$steps~grouped_dates$date,type = "h", lwd = 4 , col = "steel blue", xlab = "Dates" , ylab = "Steps" ,main = "Histogram for total number of steps taken each day")
abline(h = max(grouped_dates$steps,na.rm = TRUE))
3.1 Mean steps taken
3.2 Median of the steps taken
mean_steps <- mean(grouped_dates$steps, na.rm = TRUE)
mean_steps
## [1] 10766.19
median_steps <- median(grouped_dates$steps,na.rm = TRUE)
median_steps
## [1] 10765
4.1 Getting the average of the steps on each interval
average_steps <- testdata %>% filter(!is.na(steps)) %>% group_by(interval) %>% summarise(steps = mean(steps))
4.2 Time series plot of the data
ggplot(average_steps,aes(x = interval, y = steps)) + geom_line(color ="steel blue") + labs(title = "Time series plot of the average number of steps taken" , x= " Interval " , y = " Steps Taken ")
average_steps[(which.max(average_steps$steps)),]
## # A tibble: 1 x 2
## interval steps
## <int> <dbl>
## 1 835 206.1698
6.1 Checking for the steps with Na values
6.2 Calculating the average of steps at 5 minute time intervals
6.3 Replacing the Na values with the corresponding average steps taken.
step_na <- is.na(testdata$steps)
average_steps <- tapply(testdata$steps,testdata$interval,mean,na.rm = TRUE,simplify = TRUE)
testdata$steps[step_na] <-average_steps[as.character(testdata$interval[step_na])]
head(testdata)
## steps date interval
## 1 1.7169811 2012-10-01 0
## 2 0.3396226 2012-10-01 5
## 3 0.1320755 2012-10-01 10
## 4 0.1509434 2012-10-01 15
## 5 0.0754717 2012-10-01 20
## 6 2.0943396 2012-10-01 25
7.1 Grouping the new formed data based on the intervals and weekdays.
7.2 Plotting the data using ggplot after imputing the missing values.
steps_eachday <- testdata %>% group_by(interval) %>% summarise(steps = sum(steps))
plot(steps_eachday$steps~steps_eachday$interval , type = "h" , lwd = 4, xlab = " Interval ", ylab = " Steps ", main = "Histogram of the number of steps after missing values are imputed", col = " steel blue ")
8.1 Converting the dates into Weekdays
8.2 Plotting the data segregated into Weekend and Weekdays.
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
testdata$date <- as.Date(testdata$date , "%Y-%m-%d")
testdata$weekdays <- ifelse(weekdays(testdata$date) == "Saturday" | weekdays(testdata$date) == "Sunday" ,"Partydays(Weekend)" , "Sloppydays(Weekdays)")
grouped_weektype <- testdata %>% group_by(interval , weekdays) %>% summarise(steps = mean(steps))
ggplot(grouped_weektype, aes(x=interval, y=steps, color = weekdays)) + geom_line() + facet_wrap(~weekdays, ncol = 1, nrow=2)