library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(tidyverse)
library(ggplot2)
data(nycflights)

Excercise 1

R Markdown

names(nycflights)
##  [1] "year"      "month"     "day"       "dep_time"  "dep_delay" "arr_time" 
##  [7] "arr_delay" "carrier"   "tailnum"   "flight"    "origin"    "dest"     
## [13] "air_time"  "distance"  "hour"      "minute"
ggplot(data = nycflights, aes(x = dep_delay)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

ggplot(data = nycflights, aes(x = dep_delay)) + geom_histogram(binwidth = 15)

ggplot(data = nycflights, aes(x = dep_delay)) + geom_histogram(binwidth = 150)

THe first two show much more detail than the last one. The one with 150 dep_delay obscures the details.

lax_flights <-filter(nycflights, nycflights$dest == "LAX")

ggplot(data=lax_flights, aes(x = dep_delay)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

lax_flights <- nycflights|> filter(dest == "LAX") 
ggplot(data=lax_flights, aes(x = dep_delay)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

  lax_flights |>
summarise(mean_dd = mean(dep_delay), median_dd = median(dep_delay), n = n())
## # A tibble: 1 × 3
##   mean_dd median_dd     n
##     <dbl>     <dbl> <int>
## 1    9.78        -1  1583
sfo_feb_flights<-nycflights |> filter(dest == "SFO", month ==2)

68 flights

ggplot(data=lax_flights, aes(x = arr_delay)) + geom_histogram()

lax_flights |> summarise(mean_dd = mean(arr_delay), meadian_dd = median(arr_delay), IQR_ad = IQR(arr_delay), n = n())

3

nycflights <-nycflights|>
  mutate(dep_type=ifelse(dep_delay<5, "on time","delayed"))

nycflights|>group_by(origin)|>
  summarise(ot_dep_rate=sum(dep_type == "on time")/ n()) |>
  arrange(desc(ot_dep_rate))
## # A tibble: 3 × 2
##   origin ot_dep_rate
##   <chr>        <dbl>
## 1 LGA          0.728
## 2 JFK          0.694
## 3 EWR          0.637
 ggplot(data=nycflights, aes(x = origin, fill = dep_type))  +
  geom_bar()

nycflights <- nycflights |>mutate(avg_speed = (distance/(air_time/60)))
ggplot(data = nycflights, aes(x=avg_speed, y = distance)) + geom_point()

there is an upwards trend where the speed increases and so does the distance. I am excluding the outlier on the bottom right.

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ###. ᕙ(▀̿̿Ĺ̯̿̿▀̿ ̿)ᕗ. ( ͠°͟ʖ ͠°)

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.