R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

library(tidyverse) library(lubridate) library(nycflights13) library(readr) library(tidytext) library(stopwords) lateflights <- flights %>% filter(arr_delay>5) %>% group_by(month) %>% summarise(lateflights = n()) %>% ungroup()

lateflights total_flights_per_carrier <- flights %>% group_by(month, carrier) %>% summarise(total_flights_by_carrier = n()) summarise() has grouped output by ‘month’. You can override using the .groups argument. total_flights_per_month <- flights %>% group_by(month) %>% summarise(total_flights_per_month = n()) percentage_traffic_per_carrier <- total_flights_per_carrier %>% left_join(total_flights_per_month, by = “month”) %>% mutate(percentage_traffic = (total_flights_by_carrier / total_flights_per_month)) %>% mutate(percentage_traffic = scales::percent(percentage_traffic)) %>% select(month, carrier, percentage_traffic) percentage_traffic_per_carrier %>% spread(key = month, value = percentage_traffic) flights %>% group_by(month) %>% filter(dep_delay == max(dep_delay, na.rm = TRUE)) %>% arrange(-desc(month)) multipleChoiceResponses <- read_csv(“/cloud/project/multipleChoiceResponses1.csv”) usefulness_by_platform <- multipleChoiceResponses %>% select(starts_with(“LearningPlatformUsefulness”)) %>%
set_names(names(.) %>% str_replace(“LearningPlatformUsefulness”, ““)) %>% gather(key =”learning_platform”, value = “usefulness”,convert = FALSE, na.rm = TRUE) usefulness_by_platform %>% group_by(learning_platform, usefulness) %>% summarise(n = n()) %>% ungroup() summarise() has grouped output by ‘learning_platform’. You can override using the .groups argument. total_flights_per_month <- flights %>% group_by(month) %>% summarise(total_flights_per_month = n()) percentage_traffic_per_carrier <- total_flights_per_carrier %>% left_join(total_flights_per_month, by = “month”) %>% mutate(percentage_traffic = (total_flights_by_carrier / total_flights_per_month)) %>% mutate(percentage_traffic = scales::percent(percentage_traffic)) %>% select(month, carrier, percentage_traffic) percentage_traffic_per_carrier %>% spread(key = month, value = percentage_traffic) flights %>% group_by(month) %>% filter(dep_delay == max(dep_delay, na.rm = TRUE)) %>% arrange(-desc(month)) multipleChoiceResponses <- read_csv(“/cloud/project/multipleChoiceResponses1.csv”) usefulness_by_platform <- multipleChoiceResponses %>% select(starts_with(“LearningPlatformUsefulness”)) %>%
set_names(names(.) %>% str_replace(“LearningPlatformUsefulness”, ““)) %>% gather(key =”learning_platform”, value = “usefulness”,convert = FALSE, na.rm = TRUE) usefulness_by_platform %>% group_by(learning_platform, usefulness) %>% summarise(n = n()) %>% ungroup() summarise() has grouped output by ‘learning_platform’. You can override using the .groups argument. total_usefulness_by_platform <- usefulness_by_platform %>% group_by(learning_platform) %>% summarise(tot = n()) usefulness_count <- usefulness_by_platform %>% filter(!grepl(“Not Useful”,usefulness,ignore.case = TRUE)) %>% group_by(learning_platform) %>% summarise(count = n()) perc_usefulness <- usefulness_count %>% left_join(total_usefulness_by_platform, by = “learning_platform”) %>% mutate(perc_usefulness = count/tot) %>% mutate(perc_usefulness = round(perc_usefulness, digits = 3)) perc_usefulness perc_usefulness_fct <- perc_usefulness %>% mutate(learning_platform = fct(learning_platform) %>% fct_reorder(perc_usefulness) %>% fct_rev() %>% fct_relevel(“Courses”, after = 0)) perc_usefulness_fct %>% ggplot(aes(x = learning_platform, y = perc_usefulness))+ geom_segment(aes(xend = learning_platform, yend=0))+ geom_point()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+ labs( x = “Learning Platform”, y = “Percent finding at least somewhat useful” )+ scale_y_continuous(labels = scales::percent_format(scale = 100, suffix = “%”)) twitter_data <- readRDS(“twitter_data.rds”) twitter_data %>% group_by(complaint_label) %>% summarise( avg_followers = mean(usr_followers_count, na.rm = TRUE), min_followers = min(usr_followers_count, na.rm = TRUE), max_followers = max(usr_followers_count, na.rm = TRUE) ) twitter_data <- twitter_data %>% rename(text = tweet_text)

twitter_data %>% unnest_tokens(word, text) %>% count(word, sort = TRUE)

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.