library(dplyr)
##
## 次のパッケージを付け加えます: 'dplyr'
## 以下のオブジェクトは 'package:stats' からマスクされています:
##
## filter, lag
## 以下のオブジェクトは 'package:base' からマスクされています:
##
## intersect, setdiff, setequal, union
library(tidyr)
library(lubridate)
## 要求されたパッケージ timechange をロード中です
##
## 次のパッケージを付け加えます: 'lubridate'
## 以下のオブジェクトは 'package:base' からマスクされています:
##
## date, intersect, setdiff, union
df <- read.csv("activity.csv")
## [1] 10766.19
interval_5min <- df %>%
group_by(interval) %>%
summarize(int_steps=mean(steps, na.rm=TRUE))
plot(interval_5min$interval, interval_5min$int_steps, type="l", ylab="number of steps", xlab="interval")
max_step <- max(interval_5min$int_steps)
max_5min <- interval_5min %>%
filter(int_steps == max_step)
max_5min
## # A tibble: 1 × 2
## interval int_steps
## <int> <dbl>
## 1 835 206.
include_NA <- df %>%
filter(is.na(steps))
number_NA <- nrow(include_NA)
number_NA
## [1] 2304
# I use mean values of day to fill the missing value
mean_steps <- df %>%
group_by(interval) %>%
summarize(mean_steps=mean(steps, na.rm=TRUE))
df_add_mean <- left_join(df, mean_steps, by="interval")
new_dataset <- df_add_mean %>%
mutate(new_steps=ifelse(is.na(steps),mean_steps,steps)) %>%
select(new_steps, date, interval)
new_steps_per_day <- new_dataset%>%
group_by(date) %>%
summarize(total_steps=sum(new_steps))
hist(new_steps_per_day$total_steps)
new_mean_steps_per_day <- mean(new_steps_per_day$total_steps, na.rm=TRUE)
new_mean_steps_per_day
## [1] 10766.19
new_median_steps_per_day <- median(new_steps_per_day$total_steps, na.rm=TRUE)
new_median_steps_per_day
## [1] 10766.19
new_dataset_weekday<- new_dataset %>%
mutate(weekday = wday(date)) %>%
mutate(weekday = ifelse(weekday %in% c(2,3,4,5,6),"weekday","weekend"))
Plot weekday data as red line, and weekend data as blue line.
interval_5min_weekday <- new_dataset_weekday %>%
group_by(weekday,interval) %>%
summarize(int_steps=mean(new_steps, na.rm=TRUE))
## `summarise()` has grouped output by 'weekday'. You can override using the
## `.groups` argument.
# abstruct weekday data. And plot.
weekday_data <- interval_5min_weekday %>%
filter(weekday == "weekday")
plot(weekday_data$interval, weekday_data$int_steps, type="l", ylab="number of steps", xlab="interval",col="red")
par(new=T)
#abstract weekend data. And plot.
weekend_data <- interval_5min_weekday %>%
filter(weekday == "weekend")
plot(weekend_data$interval, weekend_data$int_steps, type="l", ylab="number of steps", xlab="interval", col="blue")
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.