Reading in the data/getting a feel for the data
activitydata <- read.csv("activity.csv")
print(summary(activitydata))
## steps date interval
## Min. : 0.00 2012-10-01: 288 Min. : 0.0
## 1st Qu.: 0.00 2012-10-02: 288 1st Qu.: 588.8
## Median : 0.00 2012-10-03: 288 Median :1177.5
## Mean : 37.38 2012-10-04: 288 Mean :1177.5
## 3rd Qu.: 12.00 2012-10-05: 288 3rd Qu.:1766.2
## Max. :806.00 2012-10-06: 288 Max. :2355.0
## NA's :2304 (Other) :15840
print(head(activitydata))
## steps date interval
## 1 NA 2012-10-01 0
## 2 NA 2012-10-01 5
## 3 NA 2012-10-01 10
## 4 NA 2012-10-01 15
## 5 NA 2012-10-01 20
## 6 NA 2012-10-01 25
Creating a histogram for the total number of steps taken each day and finding the mean and median of the total number of steps
totalsteps <- aggregate(activitydata$steps, list(activitydata$date), sum)
print(head(totalsteps))
## Group.1 x
## 1 2012-10-01 NA
## 2 2012-10-02 126
## 3 2012-10-03 11352
## 4 2012-10-04 12116
## 5 2012-10-05 13294
## 6 2012-10-06 15420
hist(totalsteps$x, xlab = "Number of Steps", main = "Steps per Day")
print(mean(totalsteps$x, na.rm = TRUE))
## [1] 10766.19
print(median(totalsteps$x, na.rm = TRUE))
## [1] 10765
Finding the average daily activity pattern using a time series plot
averagesteps <- aggregate(activitydata$steps, list(factor(activitydata$interval))
, mean, na.rm = TRUE)
print(head(averagesteps))
## Group.1 x
## 1 0 1.7169811
## 2 5 0.3396226
## 3 10 0.1320755
## 4 15 0.1509434
## 5 20 0.0754717
## 6 25 2.0943396
plot(averagesteps$Group.1, averagesteps$x, type = "l")
#Here we print the row with the maximum average value to see the interval with the max number of steps
print(averagesteps[which.max(averagesteps$x),])
## Group.1 x
## 104 835 206.1698
Working with missing values
#Total number of missing values
NAs <- (is.na(activitydata))
NAdataset <- activitydata[NAs,]
#I filled in the NAs with the mean at each interval
avgsteps_perint <- aggregate(activitydata$steps, list(activitydata$interval), mean, na.rm = TRUE)
print(head(avgsteps_perint))
## Group.1 x
## 1 0 1.7169811
## 2 5 0.3396226
## 3 10 0.1320755
## 4 15 0.1509434
## 5 20 0.0754717
## 6 25 2.0943396
colnames(avgsteps_perint)[colnames(avgsteps_perint)=="Group.1"] <- "interval"
colnames(avgsteps_perint)[colnames(avgsteps_perint)=="x"] <- "average_value"
mergeddataset <- merge(activitydata, avgsteps_perint, by = "interval")
moreNAs <- is.na(mergeddataset$steps)
mergeddataset[moreNAs,] = avgsteps_perint[moreNAs,]
## Warning in `[<-.factor`(`*tmp*`, iseq, value = c(0L, 50L, 235L, 320L,
## 340L, : invalid factor level, NA generated
print(head(mergeddataset))
## interval steps date average_value
## 1 0 1.716981 <NA> 1.716981
## 2 0 0.000000 2012-11-23 1.716981
## 3 0 0.000000 2012-10-28 1.716981
## 4 0 0.000000 2012-11-06 1.716981
## 5 0 0.000000 2012-11-24 1.716981
## 6 0 0.000000 2012-11-15 1.716981
newtotalsteps <- aggregate(mergeddataset$steps, list(mergeddataset$date), sum)
print(head(newtotalsteps))
## Group.1 x
## 1 2012-10-02 126
## 2 2012-10-03 11352
## 3 2012-10-04 12116
## 4 2012-10-05 13294
## 5 2012-10-06 15420
## 6 2012-10-07 11015
hist(newtotalsteps$x, col = "purple")
Checking for differences in activity patterns in weekdays verse weekends
activitydata$date <- as.POSIXct(strptime(activitydata$date, "%Y-%m-%d"))
activitydata$day <- 0
print(length(activitydata$date))
## [1] 17568
for(i in 1:length(activitydata$date)){
if(weekdays(activitydata$date[i]) %in% c("Sunday", "Saturday")){
activitydata$day[i] = "weekend"
} else{
activitydata$day[i] = "weekday"
}
}
print(unique(activitydata$day))
## [1] "weekday" "weekend"
weekendDAYS <- subset(activitydata, activitydata$day == "weekend")
weekdayDAYS <- subset(activitydata, activitydata$day == "weekday")
WKNDaveragesteps <- aggregate(weekendDAYS$steps, list((weekendDAYS$interval))
, mean, na.rm = TRUE)
WKDYaveragesteps <- aggregate(weekdayDAYS$steps, list((weekdayDAYS$interval))
, mean, na.rm = TRUE)
par(mfrow=c(2,1))
plot(WKDYaveragesteps$Group.1, WKDYaveragesteps$x, type = "l", col = "blue", xlab = "Interval"
,ylab = "Steps", main = "AVG Steps(Weekdays)", ylim = c(0,250))
plot(WKNDaveragesteps$Group.1, WKNDaveragesteps$x, type = "l", col = "red", xlab = "Interval"
,ylab = "Steps", main = "AVG Steps(Weekends)", ylim = c(0,250))