This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(nycflights13)
## Warning: package 'nycflights13' was built under R version 4.3.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
late_flights_by_month <- flights %>%
mutate(late = arr_delay > 5) %>%
group_by(month) %>%
summarise(lateflights = sum(late, na.rm = TRUE))
late_flights_by_month
## # A tibble: 12 × 2
## month lateflights
## <int> <int>
## 1 1 8988
## 2 2 8119
## 3 3 9033
## 4 4 10544
## 5 5 8490
## 6 6 10739
## 7 7 11518
## 8 8 9649
## 9 9 5347
## 10 10 7628
## 11 11 7485
## 12 12 12291
traffic_percentage <- flights %>%
group_by(month, carrier) %>%
summarise(n_flights = n(), .groups = 'drop') %>%
group_by(month) %>%
mutate(total_flights = sum(n_flights)) %>%
ungroup() %>%
mutate(percentage = n_flights / total_flights * 100) %>%
select(month, carrier, percentage) %>%
pivot_wider(names_from = month, values_from = percentage, names_prefix = '')
print(head(traffic_percentage, 6))
## # A tibble: 6 × 13
## carrier `1` `2` `3` `4` `5` `6` `7` `8` `9` `10`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 9E 5.83 5.85 5.64 5.33 5.08 5.09 5.08 4.96 5.58 5.79
## 2 AA 10.3 10.1 9.67 9.61 9.73 9.76 9.79 9.74 9.48 9.40
## 3 AS 0.230 0.224 0.215 0.212 0.215 0.212 0.211 0.211 0.218 0.215
## 4 B6 16.4 16.4 16.5 15.9 15.9 16.4 16.9 16.9 15.6 15.1
## 5 DL 13.7 13.8 14.5 14.4 14.2 14.6 14.4 14.7 14.1 14.2
## 6 EV 15.4 15.3 16.4 16.1 16.7 15.8 15.8 15.6 17.1 17.0
## # ℹ 2 more variables: `11` <dbl>, `12` <dbl>
latest_dep <- flights %>%
group_by(month) %>%
filter(dep_delay == max(dep_delay, na.rm = TRUE)) %>%
arrange(-desc(month))
latest_dep
## # A tibble: 12 × 19
## # Groups: month [12]
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 9 641 900 1301 1242 1530
## 2 2013 2 10 2243 830 853 100 1106
## 3 2013 3 17 2321 810 911 135 1020
## 4 2013 4 10 1100 1900 960 1342 2211
## 5 2013 5 3 1133 2055 878 1250 2215
## 6 2013 6 15 1432 1935 1137 1607 2120
## 7 2013 7 22 845 1600 1005 1044 1815
## 8 2013 8 8 2334 1454 520 120 1710
## 9 2013 9 20 1139 1845 1014 1457 2210
## 10 2013 10 14 2042 900 702 2255 1127
## 11 2013 11 3 603 1645 798 829 1913
## 12 2013 12 5 756 1700 896 1058 2020
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
library(tidyverse)
library(lubridate)
library(dplyr)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ggplot2)
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.3.2
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
##
## Attaching package: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
##
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(zoo)
blah <- read.csv("C:/Users/Badamkhand/Downloads/multipleChoiceResponses1.csv")
usefulness_count <- blah %>%
gather(key = "learning_platform", value = "usefulness", starts_with("LearningPlatformUsefulness")) %>%
filter(!is.na(usefulness)) %>%
mutate(learning_platform = gsub("LearningPlatformUsefulness", "", learning_platform)) %>%
group_by(learning_platform, usefulness) %>%
summarise(n = n()) %>%
arrange(learning_platform, usefulness)
## `summarise()` has grouped output by 'learning_platform'. You can override using
## the `.groups` argument.
atleastuseful <- blah %>%
gather(key = "learning_platform", value = "usefulness", starts_with("LearningPlatformUsefulness")) %>%
filter(!is.na(usefulness)) %>%
mutate(learning_platform = gsub("LearningPlatformUsefulness", "", learning_platform)) %>%
count(learning_platform, name = "count") %>%
left_join(
blah %>%
gather(key = "learning_platform", value = "usefulness", starts_with("LearningPlatformUsefulness")) %>%
filter(!is.na(usefulness) & usefulness != "Not Useful") %>%
mutate(learning_platform = gsub("LearningPlatformUsefulness", "", learning_platform)) %>%
count(learning_platform, name = "at_least_useful"),
by = "learning_platform"
) %>%
mutate(
tot = ifelse(is.na(at_least_useful), count, at_least_useful),
perc_usefulness = tot / count
) %>%
select(learning_platform, tot, count, perc_usefulness)
atleastuseful
## learning_platform tot count perc_usefulness
## 1 Arxiv 2354 2391 0.9845253
## 2 Blogs 4720 4765 0.9905561
## 3 College 3258 3359 0.9699315
## 4 Communities 1126 1142 0.9859895
## 5 Company 940 981 0.9582059
## 6 Conferences 2063 2182 0.9454629
## 7 Courses 5945 5992 0.9921562
## 8 Documentation 2279 2321 0.9819044
## 9 Friends 1530 1581 0.9677419
## 10 Kaggle 6527 6583 0.9914932
## 11 Newsletters 1033 1089 0.9485767
## 12 Podcasts 1090 1214 0.8978583
## 13 Projects 4755 4794 0.9918648
## 14 SO 5576 5640 0.9886525
## 15 Textbook 4112 4181 0.9834968
## 16 TradeBook 324 333 0.9729730
## 17 Tutoring 1394 1426 0.9775596
## 18 YouTube 5125 5229 0.9801109
atleastuseful %>%
mutate(
learning_platform = fct_reorder(learning_platform, perc_usefulness, .desc = TRUE),
perc_usefulness = as.numeric(perc_usefulness)
) %>%
ggplot(aes(y = learning_platform, yend = learning_platform, x = 0, xend = perc_usefulness)) +
geom_segment(color = "black") +
geom_point(aes(x = perc_usefulness), color = "black", size = 3) +
scale_x_continuous(labels = scales::percent_format()) +
coord_flip() +
labs(
title = "Percentage of Usefulness by Learning Platform",
x = "Percent findings at least somewhat useful",
y = "Learning platform"
)
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.