Code for reading in the dataset and/or processing the data
activity <- read.csv("./data/activity.csv", header=TRUE)
dates <- as.Date(activity$date)
summary(activity)
## steps date interval
## Min. : 0.00 2012-10-01: 288 Min. : 0.0
## 1st Qu.: 0.00 2012-10-02: 288 1st Qu.: 588.8
## Median : 0.00 2012-10-03: 288 Median :1177.5
## Mean : 37.38 2012-10-04: 288 Mean :1177.5
## 3rd Qu.: 12.00 2012-10-05: 288 3rd Qu.:1766.2
## Max. :806.00 2012-10-06: 288 Max. :2355.0
## NA's :2304 (Other) :15840
head(activity)
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
Histogram of the total number of steps taken each day
aggregatedTotalByDay <- aggregate(steps ~ date, data=activity, sum, na.rm=TRUE)
aggregatedTotalByDay
## date steps
## 1 2012-10-02 126
## 2 2012-10-03 11352
## 3 2012-10-04 12116
## 4 2012-10-05 13294
## 5 2012-10-06 15420
## 6 2012-10-07 11015
## 7 2012-10-09 12811
## 8 2012-10-10 9900
## 9 2012-10-11 10304
## 10 2012-10-12 17382
## 11 2012-10-13 12426
## 12 2012-10-14 15098
## 13 2012-10-15 10139
## 14 2012-10-16 15084
## 15 2012-10-17 13452
## 16 2012-10-18 10056
## 17 2012-10-19 11829
## 18 2012-10-20 10395
## 19 2012-10-21 8821
## 20 2012-10-22 13460
## 21 2012-10-23 8918
## 22 2012-10-24 8355
## 23 2012-10-25 2492
## 24 2012-10-26 6778
## 25 2012-10-27 10119
## 26 2012-10-28 11458
## 27 2012-10-29 5018
## 28 2012-10-30 9819
## 29 2012-10-31 15414
## 30 2012-11-02 10600
## 31 2012-11-03 10571
## 32 2012-11-05 10439
## 33 2012-11-06 8334
## 34 2012-11-07 12883
## 35 2012-11-08 3219
## 36 2012-11-11 12608
## 37 2012-11-12 10765
## 38 2012-11-13 7336
## 39 2012-11-15 41
## 40 2012-11-16 5441
## 41 2012-11-17 14339
## 42 2012-11-18 15110
## 43 2012-11-19 8841
## 44 2012-11-20 4472
## 45 2012-11-21 12787
## 46 2012-11-22 20427
## 47 2012-11-23 21194
## 48 2012-11-24 14478
## 49 2012-11-25 11834
## 50 2012-11-26 11162
## 51 2012-11-27 13646
## 52 2012-11-28 10183
## 53 2012-11-29 7047
hist(aggregatedTotalByDay$steps, main="Number of Steps taken Daily")
Mean and median number of steps taken each day
meanTotalByDay <- aggregate(steps ~ date, data=activity, FUN="mean")
medianTotalByDay <- aggregate(steps ~ date, data=activity, FUN="median")
meanTotalByDay
## date steps
## 1 2012-10-02 0.4375000
## 2 2012-10-03 39.4166667
## 3 2012-10-04 42.0694444
## 4 2012-10-05 46.1597222
## 5 2012-10-06 53.5416667
## 6 2012-10-07 38.2465278
## 7 2012-10-09 44.4826389
## 8 2012-10-10 34.3750000
## 9 2012-10-11 35.7777778
## 10 2012-10-12 60.3541667
## 11 2012-10-13 43.1458333
## 12 2012-10-14 52.4236111
## 13 2012-10-15 35.2048611
## 14 2012-10-16 52.3750000
## 15 2012-10-17 46.7083333
## 16 2012-10-18 34.9166667
## 17 2012-10-19 41.0729167
## 18 2012-10-20 36.0937500
## 19 2012-10-21 30.6284722
## 20 2012-10-22 46.7361111
## 21 2012-10-23 30.9652778
## 22 2012-10-24 29.0104167
## 23 2012-10-25 8.6527778
## 24 2012-10-26 23.5347222
## 25 2012-10-27 35.1354167
## 26 2012-10-28 39.7847222
## 27 2012-10-29 17.4236111
## 28 2012-10-30 34.0937500
## 29 2012-10-31 53.5208333
## 30 2012-11-02 36.8055556
## 31 2012-11-03 36.7048611
## 32 2012-11-05 36.2465278
## 33 2012-11-06 28.9375000
## 34 2012-11-07 44.7326389
## 35 2012-11-08 11.1770833
## 36 2012-11-11 43.7777778
## 37 2012-11-12 37.3784722
## 38 2012-11-13 25.4722222
## 39 2012-11-15 0.1423611
## 40 2012-11-16 18.8923611
## 41 2012-11-17 49.7881944
## 42 2012-11-18 52.4652778
## 43 2012-11-19 30.6979167
## 44 2012-11-20 15.5277778
## 45 2012-11-21 44.3993056
## 46 2012-11-22 70.9270833
## 47 2012-11-23 73.5902778
## 48 2012-11-24 50.2708333
## 49 2012-11-25 41.0902778
## 50 2012-11-26 38.7569444
## 51 2012-11-27 47.3819444
## 52 2012-11-28 35.3576389
## 53 2012-11-29 24.4687500
medianTotalByDay
## date steps
## 1 2012-10-02 0
## 2 2012-10-03 0
## 3 2012-10-04 0
## 4 2012-10-05 0
## 5 2012-10-06 0
## 6 2012-10-07 0
## 7 2012-10-09 0
## 8 2012-10-10 0
## 9 2012-10-11 0
## 10 2012-10-12 0
## 11 2012-10-13 0
## 12 2012-10-14 0
## 13 2012-10-15 0
## 14 2012-10-16 0
## 15 2012-10-17 0
## 16 2012-10-18 0
## 17 2012-10-19 0
## 18 2012-10-20 0
## 19 2012-10-21 0
## 20 2012-10-22 0
## 21 2012-10-23 0
## 22 2012-10-24 0
## 23 2012-10-25 0
## 24 2012-10-26 0
## 25 2012-10-27 0
## 26 2012-10-28 0
## 27 2012-10-29 0
## 28 2012-10-30 0
## 29 2012-10-31 0
## 30 2012-11-02 0
## 31 2012-11-03 0
## 32 2012-11-05 0
## 33 2012-11-06 0
## 34 2012-11-07 0
## 35 2012-11-08 0
## 36 2012-11-11 0
## 37 2012-11-12 0
## 38 2012-11-13 0
## 39 2012-11-15 0
## 40 2012-11-16 0
## 41 2012-11-17 0
## 42 2012-11-18 0
## 43 2012-11-19 0
## 44 2012-11-20 0
## 45 2012-11-21 0
## 46 2012-11-22 0
## 47 2012-11-23 0
## 48 2012-11-24 0
## 49 2012-11-25 0
## 50 2012-11-26 0
## 51 2012-11-27 0
## 52 2012-11-28 0
## 53 2012-11-29 0
Time series plot of the average number of steps taken
steptimeseries <- ts(aggregatedTotalByDay, frequency=10, start=c(2012-10-02,1))
steptimeseries
## Time Series:
## Start = c(2000, 1)
## End = c(2005, 3)
## Frequency = 10
## date steps
## 2000.0 2 126
## 2000.1 3 11352
## 2000.2 4 12116
## 2000.3 5 13294
## 2000.4 6 15420
## 2000.5 7 11015
## 2000.6 9 12811
## 2000.7 10 9900
## 2000.8 11 10304
## 2000.9 12 17382
## 2001.0 13 12426
## 2001.1 14 15098
## 2001.2 15 10139
## 2001.3 16 15084
## 2001.4 17 13452
## 2001.5 18 10056
## 2001.6 19 11829
## 2001.7 20 10395
## 2001.8 21 8821
## 2001.9 22 13460
## 2002.0 23 8918
## 2002.1 24 8355
## 2002.2 25 2492
## 2002.3 26 6778
## 2002.4 27 10119
## 2002.5 28 11458
## 2002.6 29 5018
## 2002.7 30 9819
## 2002.8 31 15414
## 2002.9 33 10600
## 2003.0 34 10571
## 2003.1 36 10439
## 2003.2 37 8334
## 2003.3 38 12883
## 2003.4 39 3219
## 2003.5 42 12608
## 2003.6 43 10765
## 2003.7 44 7336
## 2003.8 46 41
## 2003.9 47 5441
## 2004.0 48 14339
## 2004.1 49 15110
## 2004.2 50 8841
## 2004.3 51 4472
## 2004.4 52 12787
## 2004.5 53 20427
## 2004.6 54 21194
## 2004.7 55 14478
## 2004.8 56 11834
## 2004.9 57 11162
## 2005.0 58 13646
## 2005.1 59 10183
## 2005.2 60 7047
plot.ts(steptimeseries)
The 5-minute interval that, on average, contains the maximum number of steps
Code to describe and show a strategy for imputing missing data
library(mice)
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
md.pattern(activity)
## date interval steps
## 15264 1 1 1 0
## 2304 1 1 0 1
## 0 0 2304 2304
Histogram of the total number of steps taken each day after missing values are imputed
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
aggr_plot <- aggr(activity, col=c('navyblue','red'), numbers=TRUE, sortVars=TRUE,
labels=names(activity), cex.axis=.7, gap=3, ylab=c("Histogram of missing data","Pattern"))
##
## Variables sorted by number of missings:
## Variable Count
## steps 0.1311475
## date 0.0000000
## interval 0.0000000
Panel plot comparing the average number of steps taken per 5-minute interval across weekdays and weekends
All of the R code needed to reproduce the results (numbers, plots, etc.) in the report