This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
library(tidyverse) library(lubridate) library(nycflights13) library(readr) library(tidytext) library(stopwords) lateflights <- flights %>% filter(arr_delay>5) %>% group_by(month) %>% summarise(lateflights = n()) %>% ungroup()
lateflights total_flights_per_carrier <- flights %>%
group_by(month, carrier) %>% summarise(total_flights_by_carrier =
n()) summarise()
has grouped output by ‘month’. You can
override using the .groups
argument.
total_flights_per_month <- flights %>% group_by(month) %>%
summarise(total_flights_per_month = n()) percentage_traffic_per_carrier
<- total_flights_per_carrier %>%
left_join(total_flights_per_month, by = “month”) %>%
mutate(percentage_traffic = (total_flights_by_carrier /
total_flights_per_month)) %>% mutate(percentage_traffic =
scales::percent(percentage_traffic)) %>% select(month, carrier,
percentage_traffic) percentage_traffic_per_carrier %>% spread(key =
month, value = percentage_traffic) flights %>% group_by(month) %>%
filter(dep_delay == max(dep_delay, na.rm = TRUE)) %>%
arrange(-desc(month)) multipleChoiceResponses <-
read_csv(“/cloud/project/multipleChoiceResponses1.csv”)
usefulness_by_platform <- multipleChoiceResponses %>%
select(starts_with(“LearningPlatformUsefulness”)) %>%
set_names(names(.) %>% str_replace(“LearningPlatformUsefulness”, ““))
%>% gather(key =”learning_platform”, value = “usefulness”,convert =
FALSE, na.rm = TRUE) usefulness_by_platform %>%
group_by(learning_platform, usefulness) %>% summarise(n = n()) %>%
ungroup() summarise()
has grouped output by
‘learning_platform’. You can override using the .groups
argument. total_flights_per_month <- flights %>% group_by(month)
%>% summarise(total_flights_per_month = n())
percentage_traffic_per_carrier <- total_flights_per_carrier %>%
left_join(total_flights_per_month, by = “month”) %>%
mutate(percentage_traffic = (total_flights_by_carrier /
total_flights_per_month)) %>% mutate(percentage_traffic =
scales::percent(percentage_traffic)) %>% select(month, carrier,
percentage_traffic) percentage_traffic_per_carrier %>% spread(key =
month, value = percentage_traffic) flights %>% group_by(month) %>%
filter(dep_delay == max(dep_delay, na.rm = TRUE)) %>%
arrange(-desc(month)) multipleChoiceResponses <-
read_csv(“/cloud/project/multipleChoiceResponses1.csv”)
usefulness_by_platform <- multipleChoiceResponses %>%
select(starts_with(“LearningPlatformUsefulness”)) %>%
set_names(names(.) %>% str_replace(“LearningPlatformUsefulness”, ““))
%>% gather(key =”learning_platform”, value = “usefulness”,convert =
FALSE, na.rm = TRUE) usefulness_by_platform %>%
group_by(learning_platform, usefulness) %>% summarise(n = n()) %>%
ungroup() summarise()
has grouped output by
‘learning_platform’. You can override using the .groups
argument. total_usefulness_by_platform <- usefulness_by_platform
%>% group_by(learning_platform) %>% summarise(tot = n())
usefulness_count <- usefulness_by_platform %>% filter(!grepl(“Not
Useful”,usefulness,ignore.case = TRUE)) %>%
group_by(learning_platform) %>% summarise(count = n())
perc_usefulness <- usefulness_count %>%
left_join(total_usefulness_by_platform, by = “learning_platform”) %>%
mutate(perc_usefulness = count/tot) %>% mutate(perc_usefulness =
round(perc_usefulness, digits = 3)) perc_usefulness perc_usefulness_fct
<- perc_usefulness %>% mutate(learning_platform =
fct(learning_platform) %>% fct_reorder(perc_usefulness) %>%
fct_rev() %>% fct_relevel(“Courses”, after = 0)) perc_usefulness_fct
%>% ggplot(aes(x = learning_platform, y = perc_usefulness))+
geom_segment(aes(xend = learning_platform, yend=0))+ geom_point()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+ labs( x =
“Learning Platform”, y = “Percent finding at least somewhat useful” )+
scale_y_continuous(labels = scales::percent_format(scale = 100, suffix =
“%”)) twitter_data <- readRDS(“twitter_data.rds”) twitter_data %>%
group_by(complaint_label) %>% summarise( avg_followers =
mean(usr_followers_count, na.rm = TRUE), min_followers =
min(usr_followers_count, na.rm = TRUE), max_followers =
max(usr_followers_count, na.rm = TRUE) ) twitter_data <- twitter_data
%>% rename(text = tweet_text)
twitter_data %>% unnest_tokens(word, text) %>% count(word, sort = TRUE)
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.