R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Bellabeat Case Study

#installing packages install.packages(“tidyverse”) install.packages(“janitor”) install.packages(“skimr”) install.packages(“arsenal”)

#importing datasets library(tidyverse) library(skimr) library(janitor) library(arsenal) #upload data #choose zipfile #import dataset

#create daily_activity dataframe daily_activity1 <- read.csv(“dailyActivity_merged_3.12.16_4.11.16.csv”) daily_activity2 <- read.csv(“dailyActiivty_merged_4.12.16_5.12.16.csv”) sleep <- read.csv(“sleepDay_merged_4.12.16_5.12.16[1].csv”)

head(daily_activity1) head(daily_activity2) head(sleep)

nrow(daily_activity1) nrow(daily_activity2) nrow(sleep)

#cleaning the datasets

#checking unique ID’s n_distinct(daily_activity1\(Id) n_distinct(daily_activity2\)Id) n_distinct(sleep$Id)

#checking for duplicates sum(duplicated(daily_activity1)) sum(duplicated(daily_activity2)) sum(duplicated(sleep))

#cleaning names so that data is uniform clean_names(daily_activity1) clean_names(daily_activity2) clean_names(sleep)

#data format str(daily_activity1) str(daily_activity2) str(sleep) #change data format to date daily_activity1 <- daily_activity1 %>% + rename(date= ActivityDate) %>% + mutate(date= as_date(date, format= “%m/%d/%Y”)) daily_activity2 <- daily_activity2 %>% + rename(date= ActivityDate) %>% + mutate(date= as_date(date, format= “%m/%d/%Y”)) sleep <- sleep %>% rename(date= SleepDay) %>% mutate(date= as_date(date, format= “%m/%d/%Y))

#summarize and analyze

daily_activity1 %>% select(TotalSteps, TotalDistance, Calories) %>% summary() daily_activity2 %>% select(TotalSteps, TotalDistance, Calories) %>% summary() daily_activity1 %>% select(VeryActiveMinutes, FairlyActiveMinutes, LightlyActiveMinutes, SedentaryMinutes) %>% summary() daily_activity2 %>% select(VeryActiveMinutes, FairlyActiveMinutes, LightlyActiveMinutes, SedentaryMinutes) %>% summary()

sleep %>% select(TotalSleepRecords, TotalTimeInBed, TotalMinutesAsleep) %>% summary

view(activity_sleep)

#data visualization #scatter plots ggplot(data=daily_activity1, aes(x=TotalSteps, y=SedentaryMinutes)) + geom_point() ggplot(data=daily_activity2, aes(x=TotalSteps, y=SedentaryMinutes)) + geom_point() ggplot(data=sleep, aes(x=TotalMinutesAsleep, y=TotalTimeInBed)) + geom_point() combined_data <- merge(sleep, daily_activity2, by=“Id”) #line charts #compare datasets comparedf(daily_activity1, daily_acivity2) ## Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.