library(tidyverse)
library(lubridate)
library(nycflights13)
library(readr)
library(tidytext)
library(stopwords)
lateflights <- flights %>% 
  filter(arr_delay>5) %>% 
  group_by(month) %>% 
  summarise(lateflights = n()) %>% 
  ungroup()

lateflights
total_flights_per_carrier <- flights %>%
  group_by(month, carrier) %>%
  summarise(total_flights_by_carrier = n())
`summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
total_flights_per_month <- flights %>%
  group_by(month) %>%
  summarise(total_flights_per_month = n())
percentage_traffic_per_carrier <- total_flights_per_carrier %>%
  left_join(total_flights_per_month, by = "month") %>%
  mutate(percentage_traffic = (total_flights_by_carrier / total_flights_per_month)) %>%
  mutate(percentage_traffic = scales::percent(percentage_traffic)) %>% 
  select(month, carrier, percentage_traffic)
percentage_traffic_per_carrier %>% spread(key = month, value = percentage_traffic)
flights %>%
  group_by(month) %>%
  filter(dep_delay == max(dep_delay, na.rm = TRUE)) %>% 
  arrange(-desc(month))
multipleChoiceResponses <- read_csv("/cloud/project/multipleChoiceResponses1.csv")
Rows: 16716 Columns: 47
── Column specification ────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (46): LearningPlatformUsefulnessArxiv, LearningPlatformUsefulnessBlogs, LearningPlat...
dbl  (1): Age

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
usefulness_by_platform <- multipleChoiceResponses %>% select(starts_with("LearningPlatformUsefulness")) %>%  
  set_names(names(.) %>% str_replace("LearningPlatformUsefulness", "")) %>% 
  gather(key = "learning_platform", value = "usefulness",convert = FALSE, na.rm = TRUE)
usefulness_by_platform %>% group_by(learning_platform, usefulness) %>% 
  summarise(n = n()) %>% 
  ungroup()
`summarise()` has grouped output by 'learning_platform'. You can override using the
`.groups` argument.
total_usefulness_by_platform <- usefulness_by_platform %>% group_by(learning_platform) %>% summarise(tot = n())
usefulness_count <- usefulness_by_platform %>% filter(!grepl("Not Useful",usefulness,ignore.case = TRUE)) %>% 
  group_by(learning_platform) %>% summarise(count = n())
perc_usefulness <- usefulness_count %>% left_join(total_usefulness_by_platform, by = "learning_platform") %>% 
  mutate(perc_usefulness = count/tot) %>% 
  mutate(perc_usefulness = round(perc_usefulness, digits = 3))
perc_usefulness
perc_usefulness_fct <- perc_usefulness %>% 
  mutate(learning_platform = fct(learning_platform) %>% 
           fct_reorder(perc_usefulness) %>% 
           fct_rev() %>% 
           fct_relevel("Courses", after = 0))
perc_usefulness_fct %>% ggplot(aes(x = learning_platform, y = perc_usefulness))+
  geom_segment(aes(xend = learning_platform, yend=0))+
  geom_point()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(
    x = "Learning Platform",
    y = "Percent finding at least somewhat useful"
  )+
  scale_y_continuous(labels = scales::percent_format(scale = 100, suffix = "%"))

twitter_data <- readRDS("twitter_data.rds")
twitter_data %>%
  group_by(complaint_label) %>%
  summarise(
    avg_followers = mean(usr_followers_count, na.rm = TRUE),
    min_followers = min(usr_followers_count, na.rm = TRUE),
    max_followers = max(usr_followers_count, na.rm = TRUE)
  )
colnames(twitter_data)
[1] "tweet_id"            "date"                "complaint_label"     "tweet_text"         
[5] "usr_followers_count" "usr_verified"       
twitter_data <- twitter_data %>%
  rename(text = tweet_text)

twitter_data %>%
  unnest_tokens(word, text) %>%
  count(word, sort = TRUE)
twitter_data %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words, by = "word") %>%
  count(word, sort = TRUE)
twitter_data %>%
  unnest_tokens(word, text) %>%
  count(word, sort = TRUE)
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkobnljZmxpZ2h0czEzKQpsaWJyYXJ5KHJlYWRyKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHN0b3B3b3JkcykKbGF0ZWZsaWdodHMgPC0gZmxpZ2h0cyAlPiUgCiAgZmlsdGVyKGFycl9kZWxheT41KSAlPiUgCiAgZ3JvdXBfYnkobW9udGgpICU+JSAKICBzdW1tYXJpc2UobGF0ZWZsaWdodHMgPSBuKCkpICU+JSAKICB1bmdyb3VwKCkKCmxhdGVmbGlnaHRzCnRvdGFsX2ZsaWdodHNfcGVyX2NhcnJpZXIgPC0gZmxpZ2h0cyAlPiUKICBncm91cF9ieShtb250aCwgY2FycmllcikgJT4lCiAgc3VtbWFyaXNlKHRvdGFsX2ZsaWdodHNfYnlfY2FycmllciA9IG4oKSkKdG90YWxfZmxpZ2h0c19wZXJfbW9udGggPC0gZmxpZ2h0cyAlPiUKICBncm91cF9ieShtb250aCkgJT4lCiAgc3VtbWFyaXNlKHRvdGFsX2ZsaWdodHNfcGVyX21vbnRoID0gbigpKQpwZXJjZW50YWdlX3RyYWZmaWNfcGVyX2NhcnJpZXIgPC0gdG90YWxfZmxpZ2h0c19wZXJfY2FycmllciAlPiUKICBsZWZ0X2pvaW4odG90YWxfZmxpZ2h0c19wZXJfbW9udGgsIGJ5ID0gIm1vbnRoIikgJT4lCiAgbXV0YXRlKHBlcmNlbnRhZ2VfdHJhZmZpYyA9ICh0b3RhbF9mbGlnaHRzX2J5X2NhcnJpZXIgLyB0b3RhbF9mbGlnaHRzX3Blcl9tb250aCkpICU+JQogIG11dGF0ZShwZXJjZW50YWdlX3RyYWZmaWMgPSBzY2FsZXM6OnBlcmNlbnQocGVyY2VudGFnZV90cmFmZmljKSkgJT4lIAogIHNlbGVjdChtb250aCwgY2FycmllciwgcGVyY2VudGFnZV90cmFmZmljKQpwZXJjZW50YWdlX3RyYWZmaWNfcGVyX2NhcnJpZXIgJT4lIHNwcmVhZChrZXkgPSBtb250aCwgdmFsdWUgPSBwZXJjZW50YWdlX3RyYWZmaWMpCmZsaWdodHMgJT4lCiAgZ3JvdXBfYnkobW9udGgpICU+JQogIGZpbHRlcihkZXBfZGVsYXkgPT0gbWF4KGRlcF9kZWxheSwgbmEucm0gPSBUUlVFKSkgJT4lIAogIGFycmFuZ2UoLWRlc2MobW9udGgpKQptdWx0aXBsZUNob2ljZVJlc3BvbnNlcyA8LSByZWFkX2NzdigiL2Nsb3VkL3Byb2plY3QvbXVsdGlwbGVDaG9pY2VSZXNwb25zZXMxLmNzdiIpCnVzZWZ1bG5lc3NfYnlfcGxhdGZvcm0gPC0gbXVsdGlwbGVDaG9pY2VSZXNwb25zZXMgJT4lIHNlbGVjdChzdGFydHNfd2l0aCgiTGVhcm5pbmdQbGF0Zm9ybVVzZWZ1bG5lc3MiKSkgJT4lICAKICBzZXRfbmFtZXMobmFtZXMoLikgJT4lIHN0cl9yZXBsYWNlKCJMZWFybmluZ1BsYXRmb3JtVXNlZnVsbmVzcyIsICIiKSkgJT4lIAogIGdhdGhlcihrZXkgPSAibGVhcm5pbmdfcGxhdGZvcm0iLCB2YWx1ZSA9ICJ1c2VmdWxuZXNzIixjb252ZXJ0ID0gRkFMU0UsIG5hLnJtID0gVFJVRSkKdXNlZnVsbmVzc19ieV9wbGF0Zm9ybSAlPiUgZ3JvdXBfYnkobGVhcm5pbmdfcGxhdGZvcm0sIHVzZWZ1bG5lc3MpICU+JSAKICBzdW1tYXJpc2UobiA9IG4oKSkgJT4lIAogIHVuZ3JvdXAoKQp0b3RhbF91c2VmdWxuZXNzX2J5X3BsYXRmb3JtIDwtIHVzZWZ1bG5lc3NfYnlfcGxhdGZvcm0gJT4lIGdyb3VwX2J5KGxlYXJuaW5nX3BsYXRmb3JtKSAlPiUgc3VtbWFyaXNlKHRvdCA9IG4oKSkKdXNlZnVsbmVzc19jb3VudCA8LSB1c2VmdWxuZXNzX2J5X3BsYXRmb3JtICU+JSBmaWx0ZXIoIWdyZXBsKCJOb3QgVXNlZnVsIix1c2VmdWxuZXNzLGlnbm9yZS5jYXNlID0gVFJVRSkpICU+JSAKICBncm91cF9ieShsZWFybmluZ19wbGF0Zm9ybSkgJT4lIHN1bW1hcmlzZShjb3VudCA9IG4oKSkKcGVyY191c2VmdWxuZXNzIDwtIHVzZWZ1bG5lc3NfY291bnQgJT4lIGxlZnRfam9pbih0b3RhbF91c2VmdWxuZXNzX2J5X3BsYXRmb3JtLCBieSA9ICJsZWFybmluZ19wbGF0Zm9ybSIpICU+JSAKICBtdXRhdGUocGVyY191c2VmdWxuZXNzID0gY291bnQvdG90KSAlPiUgCiAgbXV0YXRlKHBlcmNfdXNlZnVsbmVzcyA9IHJvdW5kKHBlcmNfdXNlZnVsbmVzcywgZGlnaXRzID0gMykpCnBlcmNfdXNlZnVsbmVzcwpwZXJjX3VzZWZ1bG5lc3NfZmN0IDwtIHBlcmNfdXNlZnVsbmVzcyAlPiUgCiAgbXV0YXRlKGxlYXJuaW5nX3BsYXRmb3JtID0gZmN0KGxlYXJuaW5nX3BsYXRmb3JtKSAlPiUgCiAgICAgICAgICAgZmN0X3Jlb3JkZXIocGVyY191c2VmdWxuZXNzKSAlPiUgCiAgICAgICAgICAgZmN0X3JldigpICU+JSAKICAgICAgICAgICBmY3RfcmVsZXZlbCgiQ291cnNlcyIsIGFmdGVyID0gMCkpCnBlcmNfdXNlZnVsbmVzc19mY3QgJT4lIGdncGxvdChhZXMoeCA9IGxlYXJuaW5nX3BsYXRmb3JtLCB5ID0gcGVyY191c2VmdWxuZXNzKSkrCiAgZ2VvbV9zZWdtZW50KGFlcyh4ZW5kID0gbGVhcm5pbmdfcGxhdGZvcm0sIHllbmQ9MCkpKwogIGdlb21fcG9pbnQoKSsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKSsKICBsYWJzKAogICAgeCA9ICJMZWFybmluZyBQbGF0Zm9ybSIsCiAgICB5ID0gIlBlcmNlbnQgZmluZGluZyBhdCBsZWFzdCBzb21ld2hhdCB1c2VmdWwiCiAgKSsKICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gc2NhbGVzOjpwZXJjZW50X2Zvcm1hdChzY2FsZSA9IDEwMCwgc3VmZml4ID0gIiUiKSkKdHdpdHRlcl9kYXRhIDwtIHJlYWRSRFMoInR3aXR0ZXJfZGF0YS5yZHMiKQp0d2l0dGVyX2RhdGEgJT4lCiAgZ3JvdXBfYnkoY29tcGxhaW50X2xhYmVsKSAlPiUKICBzdW1tYXJpc2UoCiAgICBhdmdfZm9sbG93ZXJzID0gbWVhbih1c3JfZm9sbG93ZXJzX2NvdW50LCBuYS5ybSA9IFRSVUUpLAogICAgbWluX2ZvbGxvd2VycyA9IG1pbih1c3JfZm9sbG93ZXJzX2NvdW50LCBuYS5ybSA9IFRSVUUpLAogICAgbWF4X2ZvbGxvd2VycyA9IG1heCh1c3JfZm9sbG93ZXJzX2NvdW50LCBuYS5ybSA9IFRSVUUpCiAgKQpjb2xuYW1lcyh0d2l0dGVyX2RhdGEpCnR3aXR0ZXJfZGF0YSA8LSB0d2l0dGVyX2RhdGEgJT4lCiAgcmVuYW1lKHRleHQgPSB0d2VldF90ZXh0KQoKdHdpdHRlcl9kYXRhICU+JQogIHVubmVzdF90b2tlbnMod29yZCwgdGV4dCkgJT4lCiAgY291bnQod29yZCwgc29ydCA9IFRSVUUpCnR3aXR0ZXJfZGF0YSAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRleHQpICU+JQogIGFudGlfam9pbihzdG9wX3dvcmRzLCBieSA9ICJ3b3JkIikgJT4lCiAgY291bnQod29yZCwgc29ydCA9IFRSVUUpCnR3aXR0ZXJfZGF0YSAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRleHQpICU+JQogIGNvdW50KHdvcmQsIHNvcnQgPSBUUlVFKQpgYGAKCgo=