The dataset is stored in a comma-separated-value (CSV) file and contains a total of 17,568 observations.
- Firstly, we load and preprocess the data. My activity.zip file is right at the working directory along with the PA1_template.Rmd and other stuff
library(ggplot2)
act <- read.csv(unzip("repdata-data-activity.zip"))
## Warning in unzip("repdata-data-activity.zip"): error 1 in extracting from
## zip file
## Error in file(file, "rt"): invalid 'description' argument
- Format dates to the appropiate type
act$date <- as.Date(act$date , format = "%Y-%m-%d")
## Error in as.Date(act$date, format = "%Y-%m-%d"): object 'act' not found
--- .class #3
Slide 3
- From the original data, create and name the columns steps, day and interval
act.day <- aggregate(act$steps, by=list(act$date), sum)
## Error in aggregate(act$steps, by = list(act$date), sum): object 'act' not found
act.interval <- aggregate(act$steps, by=list(act$interval), sum)
## Error in aggregate(act$steps, by = list(act$interval), sum): object 'act' not found
names(act.day)[2] <- "steps"
## Error in names(act.day)[2] <- "steps": object 'act.day' not found
names(act.day)[1] <- "date"
## Error in names(act.day)[1] <- "date": object 'act.day' not found
names(act.interval)[2] <- "steps"
## Error in names(act.interval)[2] <- "steps": object 'act.interval' not found
names(act.interval)[1] <- "interval"
## Error in names(act.interval)[1] <- "interval": object 'act.interval' not found
- Now, to the orginal data, we'll aggregate and name a column with the mean number of steps per interval
act.m.interval <- aggregate(act$steps, by=list(act$interval), mean, na.rm=TRUE, na.action=NULL)
## Error in aggregate(act$steps, by = list(act$interval), mean, na.rm = TRUE, : object 'act' not found
names(act.m.interval)[1] <- "interval"
## Error in names(act.m.interval)[1] <- "interval": object 'act.m.interval' not found
names(act.m.interval)[2] <- "mean.steps"
## Error in names(act.m.interval)[2] <- "mean.steps": object 'act.m.interval' not found
--- .class #id
Slide 4
First Question: ¿What is mean total number of steps taken per day?
We'll calculate both MEAN and MEDIAN:
mean(act.day$steps, na.rm = TRUE)
## Error in mean(act.day$steps, na.rm = TRUE): object 'act.day' not found
median(act.day$steps, na.rm = TRUE )
## Error in median(act.day$steps, na.rm = TRUE): object 'act.day' not found
Note that the summary command shows, also, the number of NA in the set
summary(act.day$steps)
## Error in summary(act.day$steps): object 'act.day' not found
- And, the requested histogram:*
hist(act.day$steps, col = "lavender", main = "Histogram of Total Number of Steps per Day",
xlab = "Total Number of Steps per Day")
## Error in hist(act.day$steps, col = "lavender", main = "Histogram of Total Number of Steps per Day", : object 'act.day' not found