1.

Reading in the data/getting a feel for the data

activitydata <- read.csv("activity.csv")
print(summary(activitydata))
##      steps                date          interval     
##  Min.   :  0.00   2012-10-01:  288   Min.   :   0.0  
##  1st Qu.:  0.00   2012-10-02:  288   1st Qu.: 588.8  
##  Median :  0.00   2012-10-03:  288   Median :1177.5  
##  Mean   : 37.38   2012-10-04:  288   Mean   :1177.5  
##  3rd Qu.: 12.00   2012-10-05:  288   3rd Qu.:1766.2  
##  Max.   :806.00   2012-10-06:  288   Max.   :2355.0  
##  NA's   :2304     (Other)   :15840
print(head(activitydata))
##   steps       date interval
## 1    NA 2012-10-01        0
## 2    NA 2012-10-01        5
## 3    NA 2012-10-01       10
## 4    NA 2012-10-01       15
## 5    NA 2012-10-01       20
## 6    NA 2012-10-01       25

2.

Creating a histogram for the total number of steps taken each day and finding the mean and median of the total number of steps

totalsteps <- aggregate(activitydata$steps, list(activitydata$date), sum)
print(head(totalsteps))
##      Group.1     x
## 1 2012-10-01    NA
## 2 2012-10-02   126
## 3 2012-10-03 11352
## 4 2012-10-04 12116
## 5 2012-10-05 13294
## 6 2012-10-06 15420
hist(totalsteps$x, xlab = "Number of Steps", main = "Steps per Day")

print(mean(totalsteps$x, na.rm = TRUE))
## [1] 10766.19
print(median(totalsteps$x, na.rm = TRUE))
## [1] 10765

3.

Finding the average daily activity pattern using a time series plot

averagesteps <- aggregate(activitydata$steps, list(factor(activitydata$interval))
                          , mean, na.rm = TRUE)
print(head(averagesteps))
##   Group.1         x
## 1       0 1.7169811
## 2       5 0.3396226
## 3      10 0.1320755
## 4      15 0.1509434
## 5      20 0.0754717
## 6      25 2.0943396
plot(averagesteps$Group.1, averagesteps$x, type = "l")

#Here we print the row with the maximum average value to see the interval with the max number of steps
print(averagesteps[which.max(averagesteps$x),])
##     Group.1        x
## 104     835 206.1698

4.

Working with missing values

#Total number of missing values
NAs <- (is.na(activitydata))
NAdataset <- activitydata[NAs,]
#I filled in the NAs with the mean at each interval
avgsteps_perint <- aggregate(activitydata$steps, list(activitydata$interval), mean, na.rm = TRUE)
print(head(avgsteps_perint))
##   Group.1         x
## 1       0 1.7169811
## 2       5 0.3396226
## 3      10 0.1320755
## 4      15 0.1509434
## 5      20 0.0754717
## 6      25 2.0943396
colnames(avgsteps_perint)[colnames(avgsteps_perint)=="Group.1"] <- "interval"
colnames(avgsteps_perint)[colnames(avgsteps_perint)=="x"] <- "average_value"
mergeddataset <- merge(activitydata, avgsteps_perint, by = "interval")
moreNAs <- is.na(mergeddataset$steps)
mergeddataset[moreNAs,] = avgsteps_perint[moreNAs,]
## Warning in `[<-.factor`(`*tmp*`, iseq, value = c(0L, 50L, 235L, 320L,
## 340L, : invalid factor level, NA generated
print(head(mergeddataset))
##   interval    steps       date average_value
## 1        0 1.716981       <NA>      1.716981
## 2        0 0.000000 2012-11-23      1.716981
## 3        0 0.000000 2012-10-28      1.716981
## 4        0 0.000000 2012-11-06      1.716981
## 5        0 0.000000 2012-11-24      1.716981
## 6        0 0.000000 2012-11-15      1.716981
newtotalsteps <- aggregate(mergeddataset$steps, list(mergeddataset$date), sum)
print(head(newtotalsteps))
##      Group.1     x
## 1 2012-10-02   126
## 2 2012-10-03 11352
## 3 2012-10-04 12116
## 4 2012-10-05 13294
## 5 2012-10-06 15420
## 6 2012-10-07 11015
hist(newtotalsteps$x, col = "purple")

5

Checking for differences in activity patterns in weekdays verse weekends

activitydata$date <- as.POSIXct(strptime(activitydata$date, "%Y-%m-%d"))
activitydata$day <- 0
print(length(activitydata$date))
## [1] 17568
for(i in 1:length(activitydata$date)){
    if(weekdays(activitydata$date[i]) %in% c("Sunday", "Saturday")){
      activitydata$day[i] = "weekend"
    } else{
      activitydata$day[i] = "weekday"
    }
  }
print(unique(activitydata$day))
## [1] "weekday" "weekend"
weekendDAYS <- subset(activitydata, activitydata$day == "weekend")
weekdayDAYS <- subset(activitydata, activitydata$day == "weekday")
WKNDaveragesteps <- aggregate(weekendDAYS$steps, list((weekendDAYS$interval))
                          , mean, na.rm = TRUE)
WKDYaveragesteps <- aggregate(weekdayDAYS$steps, list((weekdayDAYS$interval))
                          , mean, na.rm = TRUE)
par(mfrow=c(2,1))
plot(WKDYaveragesteps$Group.1, WKDYaveragesteps$x, type = "l", col = "blue", xlab = "Interval"
     ,ylab = "Steps", main = "AVG Steps(Weekdays)", ylim = c(0,250))
plot(WKNDaveragesteps$Group.1, WKNDaveragesteps$x, type = "l", col = "red", xlab = "Interval"
     ,ylab = "Steps", main = "AVG Steps(Weekends)", ylim = c(0,250))