manufacturer_mpg <- select(mpg, manufacturer, cty, hwy, class)
filter(manufacturer_mpg, class == "suv")
## # A tibble: 62 × 4
## manufacturer cty hwy class
## <chr> <int> <int> <chr>
## 1 chevrolet 14 20 suv
## 2 chevrolet 11 15 suv
## 3 chevrolet 14 20 suv
## 4 chevrolet 13 17 suv
## 5 chevrolet 12 17 suv
## 6 chevrolet 14 19 suv
## 7 chevrolet 11 14 suv
## 8 chevrolet 11 15 suv
## 9 chevrolet 14 17 suv
## 10 dodge 13 17 suv
## # ℹ 52 more rows
suv_mpg <- filter(manufacturer_mpg, class == "suv")
ggplot(data = suv_mpg, mapping = aes(x = manufacturer, y = (cty + hwy) / 2)) +
stat_boxplot(geom = "errorbar", width = 0.5) + geom_boxplot() +
labs(title = "SUV Fuel Economy by Manufacturers", x = "Manufacturer", y = "Average Combined MPG") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.5), face = "bold"), axis.title = element_text(size = rel(1.2), face = "bold"))
Answer: Subaru produced the most fuel economic
SUVs.
year_mpg <- select(mpg, manufacturer, cty, hwy, class, year)
filter(year_mpg, class == "suv")
## # A tibble: 62 × 5
## manufacturer cty hwy class year
## <chr> <int> <int> <chr> <int>
## 1 chevrolet 14 20 suv 2008
## 2 chevrolet 11 15 suv 2008
## 3 chevrolet 14 20 suv 2008
## 4 chevrolet 13 17 suv 1999
## 5 chevrolet 12 17 suv 2008
## 6 chevrolet 14 19 suv 2008
## 7 chevrolet 11 14 suv 2008
## 8 chevrolet 11 15 suv 1999
## 9 chevrolet 14 17 suv 1999
## 10 dodge 13 17 suv 1999
## # ℹ 52 more rows
year_suv_mpg <- filter(year_mpg, class == "suv")
ggplot(data = year_suv_mpg, mapping = aes(x = manufacturer, y = (cty + hwy) / 2, color = factor(year))) +
stat_boxplot(geom = "errorbar", width = 0.5) + geom_boxplot() +
labs(title = "Manufacturers SUV Fuel Economy by Year", x = "Manufacturer", y = "Average Combined MPG", color = "Year") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.5), face = "bold"), axis.title = element_text(size = rel(1.2), face = "bold"))
Answer: Land Rover improved fuel economy the most
between 1999 and 2008.
long_delay <- filter(flights, dep_delay >= 120 | arr_delay >= 120)
others <- filter(flights, dep_delay < 120 & arr_delay < 120)
ggplot(data = long_delay) +
geom_bar(aes(factor(month), y = after_stat(count/sum(count)))) +
labs(title = "Long Delay Flights", x = "Carrier", y = "Relative Frequency") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.5), margin = margin(15,15,15,15)),
axis.title = element_text(size = rel(1.4)),
axis.title.x = element_text(margin = margin(10,5,5,5)),
axis.title.y = element_text(margin = margin(5,10,5,5)),
axis.text = element_text(size = rel(1.4)))
ggplot(data = others) +
geom_bar(aes(factor(month), y = after_stat(count/sum(count)))) +
labs(title = "Short or No Delay Flights", x = "Carrier", y = "Relative Frequency") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.5), margin = margin(15,15,15,15)),
axis.title = element_text(size = rel(1.4)),
axis.title.x = element_text(margin = margin(10,5,5,5)),
axis.title.y = element_text(margin = margin(5,10,5,5)),
axis.text = element_text(size = rel(1.4)))
Answer: Different months also correlate with long-delay
flights properly due to seasonal travel demand and weather. June and
July have the most number of long delay flights, while short delay
flights basically remain the same throughout the year.