data <- read.csv("E:/Nada/Others/Courses/Data Science Specialization/Ex/Course 5/activity.csv")
processedData <- subset(data, !is.na(data$steps))
sums <- aggregate(as.numeric(processedData$steps), by=list(date=processedData$date), FUN=sum)
plot(sums$date, sums$x, type="l", main = "The Average Number of Steps Taken Across all Days", xlab="Interval", ylab="Average Number of Steps")
avg <- aggregate(as.numeric(processedData$steps), by=list(interval=processedData$interval), FUN=mean)
plot(avg$interval, avg$x, type="l", main = "The Average Number of Steps Taken Across all Days", xlab="Interval", ylab="Average Number of Steps")
The maximum average is
max(avg$x)
## [1] 206.1698
The number of missing values is
sum(which(is.na(data)))
## [1] 21483648
navalues <- subset(data, is.na(data))
index <- which(is.na(data))
navalues$steps[avg$interval == navalues$interval] <- avg$x
finalData <- data
finalData$steps[index] <- navalues$steps
sums <- aggregate(as.numeric(finalData$steps), by=list(date=finalData$date), FUN=sum)
plot(sums$date, sums$x, type="l", main = "The Average Number of Steps Taken Across all Days", xlab="Interval", ylab="Average Number of Steps")
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.2
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
week <- weekdays(as.Date(finalData$date))
finalData$weekday <- week