## Function definition part
func_totalstepseachday <- function(par_activity){
totalstepseachday <- with(par_activity,tapply(steps, date, sum, na.rm = TRUE))
#totalstepseachday <- aggregate(steps~date, data=par_activity, sum, na.rm=TRUE)
totalstepseachdaydf <- data.frame(
date = as.Date(as.character(names(totalstepseachday))),
totalsteps = totalstepseachday)
histbinwidth <- diff(range(totalstepseachdaydf$totalsteps)) / 30
h1 <- ggplot(totalstepseachdaydf, aes(x = totalsteps)) +
geom_histogram(binwidth = histbinwidth,
fill = "white", color = "black") +
xlab("Total Steps Each Day") +
ggtitle("Total Number of Steps Taken Each Day") +
theme(plot.title = element_text(size = 20))
print(h1)
meantotalstepseachday <- format(mean(totalstepseachday),digits = 6)
mediantotalstepseachday <- format(median(totalstepseachday),digits = 6)
print(paste0("The mean total number of steps take per day is : ",
meantotalstepseachday))
print(paste0("The median total number of steps take per day is : ",
mediantotalstepseachday))
}
func_avgstepsperintervaldf <- function(par_activity){
avgstepsperinterval <- with(par_activity,
tapply(steps, interval, mean, na.rm = TRUE))
interval <- paste0("000",as.character(names(avgstepsperinterval)))
interval <- substring(interval, first = nchar(interval) - 3)
avgstepsperintervaldf <- data.frame(
interval = interval,
avgsteps = avgstepsperinterval)
return(avgstepsperintervaldf)
}
## Execution part
library(ggplot2)
library(reshape2)
activity <- read.csv("activity.csv")
func_totalstepseachday(activity)
## [1] "The mean total number of steps take per day is : 9354.23"
## [1] "The median total number of steps take per day is : 10395"
intervallabelpos <- paste0("0", seq(from = 0,to = 23, by = 2), "00")
intervallabelpos <- substring(intervallabelpos,
first = nchar(intervallabelpos) - 3)
intervallabelupperpart <- substring(intervallabelpos, first = 1, last = 2)
intervallabeldownpart <- substring(intervallabelpos, first = 3, last = 4)
intervallabeltext <- paste0(intervallabelupperpart, ":", intervallabeldownpart)
df_avgstepsperinterval <- func_avgstepsperintervaldf(activity)
h2 <- ggplot(df_avgstepsperinterval, aes(x = interval, y = avgsteps, group = 1)) +
geom_line() +
scale_x_discrete(breaks = intervallabelpos, labels = intervallabeltext) +
xlab("Data Collection Interval") +
ylab("Average Steps ") +
ggtitle("Average Number of Steps Across All Days") +
theme(plot.title = element_text(size = 20))
h2
maxindex <- row.names(df_avgstepsperinterval[which.max(df_avgstepsperinterval$avgsteps),])
print(paste0("The interval containing the maximum number of steps is : ",
maxindex))
## [1] "The interval containing the maximum number of steps is : 835"
nasum <- sum(is.na(activity$steps))
print(paste0("The total number of missing values in the dataset is : ",
nasum))
## [1] "The total number of missing values in the dataset is : 2304"
It is considered to use average steps of each interval across all days to replace NA :
activitytofillNA <- activity
naindex <- which(is.na(activitytofillNA$steps))
intervalindex <- activitytofillNA$interval[naindex]
replacenanumeric <- numeric(length(intervalindex))
for(i in 1:length(intervalindex)){
findnum <- df_avgstepsperinterval$avgsteps[names(df_avgstepsperinterval$avgsteps)
== as.character(intervalindex[i])]
replacenanumeric[i] <- findnum
}
activitytofillNA$steps[naindex] <- replacenanumeric
func_totalstepseachday(activitytofillNA)
## [1] "The mean total number of steps take per day is : 10766.2"
## [1] "The median total number of steps take per day is : 10766.2"
df_activityaddweekdayfactor <- data.frame(activitytofillNA,
weekdayfactor = character(nrow(activitytofillNA)),
stringsAsFactors = FALSE)
df_activityaddweekdayfactor$weekdayfactor <- "weekday"
for(i in 1:nrow(df_activityaddweekdayfactor)){
weekdayvar <- weekdays(as.Date(df_activityaddweekdayfactor[i,2]))
if(weekdayvar %in% c("Saturday", "Sunday")){
df_activityaddweekdayfactor$weekdayfactor[i] <- "weekend"
}
}
df_activityaddweekdayfactor$weekdayfactor <- as.factor(df_activityaddweekdayfactor$weekdayfactor)
df_avgstepsperintervalconnected <- t(with(df_activityaddweekdayfactor,
tapply(steps,list(weekdayfactor,interval), mean)))
df_avgstepsperintervalconnected <- as.data.frame(df_avgstepsperintervalconnected)
df_avgstepsperintervalconnected$interval <- as.numeric(rownames(df_avgstepsperintervalconnected))
df_avgstepsperintervalmelt <- melt(df_avgstepsperintervalconnected, id = "interval",
measure.vars = c("weekday","weekend"))
colnames(df_avgstepsperintervalmelt)[2:3] <- c("weekdayfactor","avgsteps")
if(FALSE){
df_avgstepsperintervalweekday <-
func_avgstepsperintervaldf(df_activityaddweekdayfactor
[as.character(df_activityaddweekdayfactor$weekdayfactor) ==
"weekday", 1:3])
df_avgstepsperintervalweekday <- data.frame(df_avgstepsperintervalweekday,
weekdayfactor = "weekday",
stringsAsFactors = FALSE)
df_avgstepsperintervalweekend <-
func_avgstepsperintervaldf(df_activityaddweekdayfactor
[as.character(df_activityaddweekdayfactor$weekdayfactor) ==
"weekend", 1:3])
df_avgstepsperintervalweekend <- data.frame(df_avgstepsperintervalweekend,
weekdayfactor = "weekend",
stringsAsFactors = FALSE)
df_avgstepsperintervalconnected <- rbind(df_avgstepsperintervalweekday,
df_avgstepsperintervalweekend)
}
df_avgstepsperintervalmelt$interval <- rep(1:288,2)
h3 <- ggplot(df_avgstepsperintervalmelt, aes(x = interval, y = avgsteps, group = 1)) +
geom_line() +
facet_grid(weekdayfactor ~ ., as.table = FALSE) +
#scale_x_discrete(breaks = intervallabelpos, labels = intervallabeltext) +
xlab("Data Collection Interval") +
ylab("Average Steps ") +
ggtitle("Comparison of Average Number of Steps \nBetween Weekdays and Weekend") +
theme(plot.title = element_text(size = 20))
h3