data <- read.csv("activity.csv", na.strings = "NA")
data$date = as.Date(strptime(data$date, format = "%Y-%m-%d"))
dataAc <- data[complete.cases(data), ]
s <- split(dataAc, data$date)
## Warning: data length is not a multiple of split variable
total_steps <- sapply(s, function(x) sum(x[, "steps"]))
hist(total_steps)
## total_steps<-na.omit(total_steps)
m <- mean(total_steps)
md <- median(total_steps)
Mean of total mumber of steps taken per day = 9354.2295
Median of total number of steps taken per day = 10395
## dataAc<-data[complete.cases(data),]
da <- split(data, data$interval)
daily_activity <- sapply(da, function(x) mean(x[, "steps"], na.rm = TRUE))
data_dailyActivity <- cbind(as.integer(names(daily_activity)), as.numeric(daily_activity[]))
data_dailyActivity <- data.frame(data_dailyActivity)
colnames(data_dailyActivity) <- c("interval", "steps")
with(data_dailyActivity, plot(interval, steps, type = "l"))
max_steps <- subset(data_dailyActivity, steps == max(data_dailyActivity$steps))[1,
1]
Interval 835, on average across all the days in the dataset, contains the maximum number of steps
missing_values <- nrow(data) - nrow(data[complete.cases(data), ])
There are 2304 rows in the dataset that includes NA values. My strategy for filling these missing values in the dataset is to use mean for that 5-minute interval.
## Adding new column includes avarega steps per interval
data["averageStepsPerInterval"] <- data_dailyActivity$steps
for (i in 1:nrow(data)) {
if (is.na(data[i, 1]))
data[i, 1] <- data[i, 4]
}
## Histogram of total number of steps taking each day and calculating mean
## and median
n <- split(data, data$date)
new_total_steps <- sapply(n, function(x) sum(x[, "steps"]))
hist(new_total_steps)
m = mean(new_total_steps)
md = median(new_total_steps)
Mean of total mumber of steps taken per day after NA replacement = 1.0766 × 104
Median of total mumber of steps taken per day after NA replacement = 1.0766 × 104
Median and mean values changed.
library(ggplot2)
data["WeekdayorWeekend"] <- data$date
data$WeekdayorWeekend <- as.character(data$WeekdayorWeekend)
for (i in 1:nrow(data)) {
if (data[i, 2] == "Saturday" | data[i, 2] == "Sunday") {
data[i, 5] <- "weekend"
} else {
data[i, 5] <- "weekday"
}
}
## Error: character string is not in a standard unambiguous format
dt <- split(data, data$interval)
new_daily_activity <- sapply(dt, function(x) mean(x[, "steps"]))
new_data_dailyActivity <- cbind(as.integer(names(new_daily_activity)), as.numeric(new_daily_activity[]),
data$WeekdayorWeekend)
new_data_dailyActivity <- data.frame(new_data_dailyActivity)
colnames(new_data_dailyActivity) <- c("interval", "steps", "wd")
new_data_dailyActivity$interval <- as.integer(new_data_dailyActivity$interval)
new_data_dailyActivity$steps <- as.numeric(new_data_dailyActivity$steps)
q <- ggplot(new_data_dailyActivity, aes(y = steps, x = interval))
q + facet_grid(. ~ wd) + geom_line()
library()
qplot(votes, rating, data = movies)
qplot(votes, rating, data = movies, panel = panel.loess)
## Error: object 'panel.loess' not found
qplot(votes, rating, data = movies, smooth = "loess")