I. Loading and preprocessing the data

rm(list=ls())
activity_raw<-read.csv(choose.files(), stringsAsFactors = FALSE)
# Transform the data
activity_raw$date <- as.POSIXct(activity_raw$date, format="%Y-%m-%d")
# Compute the weekdays from the date attribute
activity_raw <- data.frame(date=activity_raw$date, 
                           weekday=tolower(weekdays(activity_raw$date)), 
                           steps=activity_raw$steps, 
                           interval=activity_raw$interval)

# Compute the day type (weekend or weekday)
activity_raw <- cbind(activity_raw, 
                      daytype=ifelse(activity_raw$weekday == "saturday" | 
                                     activity_raw$weekday == "sunday", "weekend", 
                                     "weekday"))

# Create the final data.frame
activity <- data.frame(date=activity_raw$date, 
                       weekday=activity_raw$weekday, 
                       daytype=activity_raw$daytype, 
                       interval=activity_raw$interval,
                       steps=activity_raw$steps)

head (activity)
##         date weekday daytype interval steps
## 1 2012-10-01   lunes weekday        0    NA
## 2 2012-10-01   lunes weekday        5    NA
## 3 2012-10-01   lunes weekday       10    NA
## 4 2012-10-01   lunes weekday       15    NA
## 5 2012-10-01   lunes weekday       20    NA
## 6 2012-10-01   lunes weekday       25    NA
  1. What is the mean total number of steps taken per day?
# Compute the total number of steps each day (NA values removed)
sum_data <- aggregate(activity$steps, by=list(activity$date), FUN=sum, na.rm=TRUE)
# Rename the attributes
names(sum_data) <- c("date", "total")
head(sum_data)
##         date total
## 1 2012-10-01     0
## 2 2012-10-02   126
## 3 2012-10-03 11352
## 4 2012-10-04 12116
## 5 2012-10-05 13294
## 6 2012-10-06 15420
# Compute the histogram of the total number of steps each day
hist(sum_data$total, 
     breaks=seq(from=0, to=25000, by=2500),
     col="blue", 
     xlab="Total number of steps", 
     ylim=c(0, 20), 
     main="Histogram of the total number of steps taken each day\n(NA removed)")

mean(sum_data$total)
## [1] 9354.23
median(sum_data$total)
## [1] 10395
  1. What is the average daily activity pattern?
# Clear the workspace
rm(sum_data)

# Compute the means of steps accross all days for each interval
mean_data <- aggregate(activity$steps, 
                       by=list(activity$interval), 
                       FUN=mean, 
                       na.rm=TRUE)
mean_data
##     Group.1           x
## 1         0   1.7169811
## 2         5   0.3396226
## 3        10   0.1320755
## 4        15   0.1509434
## 5        20   0.0754717
## 6        25   2.0943396
## 7        30   0.5283019
## 8        35   0.8679245
## 9        40   0.0000000
## 10       45   1.4716981
## 11       50   0.3018868
## 12       55   0.1320755
## 13      100   0.3207547
## 14      105   0.6792453
## 15      110   0.1509434
## 16      115   0.3396226
## 17      120   0.0000000
## 18      125   1.1132075
## 19      130   1.8301887
## 20      135   0.1698113
## 21      140   0.1698113
## 22      145   0.3773585
## 23      150   0.2641509
## 24      155   0.0000000
## 25      200   0.0000000
## 26      205   0.0000000
## 27      210   1.1320755
## 28      215   0.0000000
## 29      220   0.0000000
## 30      225   0.1320755
## 31      230   0.0000000
## 32      235   0.2264151
## 33      240   0.0000000
## 34      245   0.0000000
## 35      250   1.5471698
## 36      255   0.9433962
## 37      300   0.0000000
## 38      305   0.0000000
## 39      310   0.0000000
## 40      315   0.0000000
## 41      320   0.2075472
## 42      325   0.6226415
## 43      330   1.6226415
## 44      335   0.5849057
## 45      340   0.4905660
## 46      345   0.0754717
## 47      350   0.0000000
## 48      355   0.0000000
## 49      400   1.1886792
## 50      405   0.9433962
## 51      410   2.5660377
## 52      415   0.0000000
## 53      420   0.3396226
## 54      425   0.3584906
## 55      430   4.1132075
## 56      435   0.6603774
## 57      440   3.4905660
## 58      445   0.8301887
## 59      450   3.1132075
## 60      455   1.1132075
## 61      500   0.0000000
## 62      505   1.5660377
## 63      510   3.0000000
## 64      515   2.2452830
## 65      520   3.3207547
## 66      525   2.9622642
## 67      530   2.0943396
## 68      535   6.0566038
## 69      540  16.0188679
## 70      545  18.3396226
## 71      550  39.4528302
## 72      555  44.4905660
## 73      600  31.4905660
## 74      605  49.2641509
## 75      610  53.7735849
## 76      615  63.4528302
## 77      620  49.9622642
## 78      625  47.0754717
## 79      630  52.1509434
## 80      635  39.3396226
## 81      640  44.0188679
## 82      645  44.1698113
## 83      650  37.3584906
## 84      655  49.0377358
## 85      700  43.8113208
## 86      705  44.3773585
## 87      710  50.5094340
## 88      715  54.5094340
## 89      720  49.9245283
## 90      725  50.9811321
## 91      730  55.6792453
## 92      735  44.3207547
## 93      740  52.2641509
## 94      745  69.5471698
## 95      750  57.8490566
## 96      755  56.1509434
## 97      800  73.3773585
## 98      805  68.2075472
## 99      810 129.4339623
## 100     815 157.5283019
## 101     820 171.1509434
## 102     825 155.3962264
## 103     830 177.3018868
## 104     835 206.1698113
## 105     840 195.9245283
## 106     845 179.5660377
## 107     850 183.3962264
## 108     855 167.0188679
## 109     900 143.4528302
## 110     905 124.0377358
## 111     910 109.1132075
## 112     915 108.1132075
## 113     920 103.7169811
## 114     925  95.9622642
## 115     930  66.2075472
## 116     935  45.2264151
## 117     940  24.7924528
## 118     945  38.7547170
## 119     950  34.9811321
## 120     955  21.0566038
## 121    1000  40.5660377
## 122    1005  26.9811321
## 123    1010  42.4150943
## 124    1015  52.6603774
## 125    1020  38.9245283
## 126    1025  50.7924528
## 127    1030  44.2830189
## 128    1035  37.4150943
## 129    1040  34.6981132
## 130    1045  28.3396226
## 131    1050  25.0943396
## 132    1055  31.9433962
## 133    1100  31.3584906
## 134    1105  29.6792453
## 135    1110  21.3207547
## 136    1115  25.5471698
## 137    1120  28.3773585
## 138    1125  26.4716981
## 139    1130  33.4339623
## 140    1135  49.9811321
## 141    1140  42.0377358
## 142    1145  44.6037736
## 143    1150  46.0377358
## 144    1155  59.1886792
## 145    1200  63.8679245
## 146    1205  87.6981132
## 147    1210  94.8490566
## 148    1215  92.7735849
## 149    1220  63.3962264
## 150    1225  50.1698113
## 151    1230  54.4716981
## 152    1235  32.4150943
## 153    1240  26.5283019
## 154    1245  37.7358491
## 155    1250  45.0566038
## 156    1255  67.2830189
## 157    1300  42.3396226
## 158    1305  39.8867925
## 159    1310  43.2641509
## 160    1315  40.9811321
## 161    1320  46.2452830
## 162    1325  56.4339623
## 163    1330  42.7547170
## 164    1335  25.1320755
## 165    1340  39.9622642
## 166    1345  53.5471698
## 167    1350  47.3207547
## 168    1355  60.8113208
## 169    1400  55.7547170
## 170    1405  51.9622642
## 171    1410  43.5849057
## 172    1415  48.6981132
## 173    1420  35.4716981
## 174    1425  37.5471698
## 175    1430  41.8490566
## 176    1435  27.5094340
## 177    1440  17.1132075
## 178    1445  26.0754717
## 179    1450  43.6226415
## 180    1455  43.7735849
## 181    1500  30.0188679
## 182    1505  36.0754717
## 183    1510  35.4905660
## 184    1515  38.8490566
## 185    1520  45.9622642
## 186    1525  47.7547170
## 187    1530  48.1320755
## 188    1535  65.3207547
## 189    1540  82.9056604
## 190    1545  98.6603774
## 191    1550 102.1132075
## 192    1555  83.9622642
## 193    1600  62.1320755
## 194    1605  64.1320755
## 195    1610  74.5471698
## 196    1615  63.1698113
## 197    1620  56.9056604
## 198    1625  59.7735849
## 199    1630  43.8679245
## 200    1635  38.5660377
## 201    1640  44.6603774
## 202    1645  45.4528302
## 203    1650  46.2075472
## 204    1655  43.6792453
## 205    1700  46.6226415
## 206    1705  56.3018868
## 207    1710  50.7169811
## 208    1715  61.2264151
## 209    1720  72.7169811
## 210    1725  78.9433962
## 211    1730  68.9433962
## 212    1735  59.6603774
## 213    1740  75.0943396
## 214    1745  56.5094340
## 215    1750  34.7735849
## 216    1755  37.4528302
## 217    1800  40.6792453
## 218    1805  58.0188679
## 219    1810  74.6981132
## 220    1815  85.3207547
## 221    1820  59.2641509
## 222    1825  67.7735849
## 223    1830  77.6981132
## 224    1835  74.2452830
## 225    1840  85.3396226
## 226    1845  99.4528302
## 227    1850  86.5849057
## 228    1855  85.6037736
## 229    1900  84.8679245
## 230    1905  77.8301887
## 231    1910  58.0377358
## 232    1915  53.3584906
## 233    1920  36.3207547
## 234    1925  20.7169811
## 235    1930  27.3962264
## 236    1935  40.0188679
## 237    1940  30.2075472
## 238    1945  25.5471698
## 239    1950  45.6603774
## 240    1955  33.5283019
## 241    2000  19.6226415
## 242    2005  19.0188679
## 243    2010  19.3396226
## 244    2015  33.3396226
## 245    2020  26.8113208
## 246    2025  21.1698113
## 247    2030  27.3018868
## 248    2035  21.3396226
## 249    2040  19.5471698
## 250    2045  21.3207547
## 251    2050  32.3018868
## 252    2055  20.1509434
## 253    2100  15.9433962
## 254    2105  17.2264151
## 255    2110  23.4528302
## 256    2115  19.2452830
## 257    2120  12.4528302
## 258    2125   8.0188679
## 259    2130  14.6603774
## 260    2135  16.3018868
## 261    2140   8.6792453
## 262    2145   7.7924528
## 263    2150   8.1320755
## 264    2155   2.6226415
## 265    2200   1.4528302
## 266    2205   3.6792453
## 267    2210   4.8113208
## 268    2215   8.5094340
## 269    2220   7.0754717
## 270    2225   8.6981132
## 271    2230   9.7547170
## 272    2235   2.2075472
## 273    2240   0.3207547
## 274    2245   0.1132075
## 275    2250   1.6037736
## 276    2255   4.6037736
## 277    2300   3.3018868
## 278    2305   2.8490566
## 279    2310   0.0000000
## 280    2315   0.8301887
## 281    2320   0.9622642
## 282    2325   1.5849057
## 283    2330   2.6037736
## 284    2335   4.6981132
## 285    2340   3.3018868
## 286    2345   0.6415094
## 287    2350   0.2264151
## 288    2355   1.0754717
# Rename the attributes
names(mean_data) <- c("interval", "mean")

head(mean_data)
##   interval      mean
## 1        0 1.7169811
## 2        5 0.3396226
## 3       10 0.1320755
## 4       15 0.1509434
## 5       20 0.0754717
## 6       25 2.0943396
#Compute time series graph
plot(mean_data$interval, 
     mean_data$mean, 
     type="l", 
     col="blue", 
     lwd=2, 
     xlab="Interval [minutes]", 
     ylab="Average number of steps", 
     main="Time-series of the average number of steps per intervals\n(NA removed)")

# We find the position of the maximum mean
max_pos <- which(mean_data$mean == max(mean_data$mean))

max_pos
## [1] 104
# We lookup the value of interval at this position
max_interval <- mean_data[max_pos, 1]

max_interval
## [1] 835
# Clear the workspace
rm(max_pos, mean_data)

Inputing the missing values

# Clear the workspace
rm(max_interval)

# We use the trick that a TRUE boolean value is equivalent to 1 and a FALSE to 0.
NA_count <- sum(is.na(activity$steps))

# Clear the workspace
rm(NA_count)

# Find the NA positions
na_pos <- which(is.na(activity$steps))

# Create a vector of means
mean_vec <- rep(mean(activity$steps, na.rm=TRUE), times=length(na_pos))

# Replace the NAs by the means
activity[na_pos, "steps"] <- mean_vec

# Clear the workspace
rm(mean_vec, na_pos)

head(activity)
##         date weekday daytype interval   steps
## 1 2012-10-01   lunes weekday        0 37.3826
## 2 2012-10-01   lunes weekday        5 37.3826
## 3 2012-10-01   lunes weekday       10 37.3826
## 4 2012-10-01   lunes weekday       15 37.3826
## 5 2012-10-01   lunes weekday       20 37.3826
## 6 2012-10-01   lunes weekday       25 37.3826
# Compute the total number of steps each day (NA values removed)
sum_data <- aggregate(activity$steps, by=list(activity$date), FUN=sum)

# Rename the attributes
names(sum_data) <- c("date", "total")

# Compute the histogram of the total number of steps each day
hist(sum_data$total, 
     breaks=seq(from=0, to=25000, by=2500),
     col="blue", 
     xlab="Total number of steps", 
     ylim=c(0, 30), 
     main="Histogram of the total number of steps taken each day\n(NA replaced by mean value)")

mean(sum_data$total)
## [1] 10766.19
median(sum_data$total)
## [1] 10766.19

Are there differences in activity patterns between weekdays and weekends?

# The new factor variable "daytype" was already in the activity data frame
head(activity)
##         date weekday daytype interval   steps
## 1 2012-10-01   lunes weekday        0 37.3826
## 2 2012-10-01   lunes weekday        5 37.3826
## 3 2012-10-01   lunes weekday       10 37.3826
## 4 2012-10-01   lunes weekday       15 37.3826
## 5 2012-10-01   lunes weekday       20 37.3826
## 6 2012-10-01   lunes weekday       25 37.3826
# Load the lattice graphical library
require(lattice)
## Loading required package: lattice
# Compute the average number of steps taken, averaged across all daytype variable
mean_data <- aggregate(activity$steps, 
                       by=list(activity$daytype, 
                               activity$weekday,   activity$interval), mean)

# Rename the attributes
names(mean_data) <- c("daytype", "weekday", "interval", "mean")
head(mean_data)
##   daytype   weekday interval     mean
## 1 weekday   domingo        0 4.672825
## 2 weekday    jueves        0 9.375844
## 3 weekday     lunes        0 9.418355
## 4 weekday    martes        0 0.000000
## 5 weekday miércoles        0 7.931400
## 6 weekday    sábado        0 4.672825
### The time series plot take the following form:

# Compute the time serie plot
xyplot(mean ~ interval | daytype, mean_data, 
       type="l", 
       lwd=1, 
       xlab="Interval", 
       ylab="Number of steps", 
       layout=c(1,2))