This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
install.packages(“nycflights13”)
data(flights)
late_flights <- flights %>% mutate(late_arrival = arr_delay > 5) %>% filter(late_arrival) %>% group_by(month) %>% summarize(lateflights = n(), .groups = “drop”)
print(late_flights)
traffic_percentage <- flights %>% group_by(month, carrier) %>% summarize(total_flights = n(), .groups = “drop”) %>% group_by(month) %>% mutate(traffic_percent = (total_flights / sum(total_flights)) * 100) %>% ungroup()
print(traffic_percentage)
latest_flights <- flights %>% mutate(dep_time_numeric = if_else(dep_time < 2400, dep_time, NA_real_)) %>% group_by(month) %>% filter(dep_time_numeric == max(dep_time_numeric, na.rm = TRUE)) %>% select(year, month, day, dep_time, sched_dep_time, dep_delay, arr_time, sched_arr_time, carrier)
print(latest_flights)
responses <- read.csv(“/mnt/data/multipleChoiceResponses1.csv”)
usefulness_count <- responses %>% pivot_longer(cols = starts_with(“LearningPlatformUsefulness”), names_to = “learning_platform”, values_to = “usefulness”) %>% drop_na(usefulness) %>% mutate(learning_platform = str_remove(learning_platform, “LearningPlatformUsefulness”)) %>% count(learning_platform, usefulness)
print(usefulness_count)
usefulness_summary <- usefulness_count %>% group_by(learning_platform) %>% summarize( tot = sum(n), count = sum(ifelse(usefulness != “Not Useful”, n, 0)), perc_usefulness = count / tot )
print(usefulness_summary) # Load twitter data twitter_data <- readRDS(“/mnt/data/twitter_data.rds”)
followers_stats <- twitter_data %>% group_by(complaint_label) %>% summarize( avg_followers = mean(followers_count, na.rm = TRUE), min_followers = min(followers_count, na.rm = TRUE), max_followers = max(followers_count, na.rm = TRUE) )
print(followers_stats)
library(tidytext)
word_counts <- twitter_data %>% unnest_tokens(word, text) %>% count(word, sort = TRUE)
print(word_counts)