library(gapminder)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(gt)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(nycflights13)
#1A It seems as though the countries with the highest GDP per capita have higher life expectancies.
plot1 <- ggplot(data = gapminder, aes(x = gdpPercap,y=lifeExp)) + geom_point(mapping = aes(color = continent)) + geom_smooth() +
ylab("Life Expectancy") + xlab("GDP Per Capita") + ggtitle("The Effect of the GDP Per Capita on the Life Expectancy in the World") + labs(subtitle = "Excluding Antartica due to lack of human life") + theme_dark() + theme(plot.title = element_text(hjust = 0.5)) + scale_color_discrete (name="Continents")
ggplotly(plot1)
#1B
plot2 <- plot1 + scale_y_continuous("Life Expectancy") + scale_x_log10("the logarithm of GDP per capita") + ggtitle("The Effect of the GDP Per Capita on the Life Expectancy in the World") + theme_dark() + theme(plot.title = element_text(hjust = 0.5)) + scale_color_discrete (name="Continents")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
ggplotly(plot2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#1C The top six countries are in the continent of Africa, and the rest are in Europe. The countries in Africa growth in life expectancy over the years has been miniscule, and in some cases negative. While the ones in Europe have grown more effectively. Lasty the countries in Africa’s life expectancies are typically in the 40s to 50s range, while the countries in Europe are in the 70s to 80s range.
table1 <- gapminder %>%
group_by(country)%>%
summarise(
first_year = first(lifeExp),
last_year = last(lifeExp),
pct_change = (last_year - first_year)/(first_year)
)%>%
arrange(pct_change)%>%
head(10)%>%
gt(table1)%>%
tab_header(
title = md("Countries With The Least Development In Life Expectancy"),
subtitle = md("Top Ten"))
## Warning in !is.null(rowname_col) && rowname_col %in% colnames(data_tbl):
## 'length(x) = 4 > 1' in coercion to 'logical(1)'
#2 I conclude that clarity alone is not enough to determine the value of a diamond. Because the median price of IF is higher than I1 even though it is considered to be the worst. But considering other confounding variables such as the diamond’s carat size will help the user understand that the size of the diamond affects the pricing as well. In my opinion, it seems like it is the confounding variable with the greatest affect compared to clarity.
plot2 <- ggplot(data = diamonds) + geom_boxplot(mapping = aes(x = price, y = clarity)) + xlab("Price") + ylab("Clarity") + ggtitle("The Price of a Diamond Based on Clarity") + theme_bw()
plot2
#3 The average delay tends to decrease as the number of flights increases. This may be due to a larger sample size allowing for more specified results. While destinations with smaller flight amounts might cause the average arrival delay to be skewed up or skewed down.
nyc_flight_info <- flights%>%
group_by(dest)%>%
summarise(
avg_flights = n(),
avg_delay = mean(arr_delay, na.rm = TRUE)
)
nyc_flight_info
## # A tibble: 105 × 3
## dest avg_flights avg_delay
## <chr> <int> <dbl>
## 1 ABQ 254 4.38
## 2 ACK 265 4.85
## 3 ALB 439 14.4
## 4 ANC 8 -2.5
## 5 ATL 17215 11.3
## 6 AUS 2439 6.02
## 7 AVL 275 8.00
## 8 BDL 443 7.05
## 9 BGR 375 8.03
## 10 BHM 297 16.9
## # … with 95 more rows
plot3 <- ggplot(data = nyc_flight_info, aes(x = avg_flights, y = avg_delay)) + geom_point() + geom_smooth() + ylab("Average Arrival Delay") + xlab("Average Flights") + ggtitle("New York Airports's Average Arrival Delay Per Destination") + theme(plot.title = element_text(hjust = 0.5))
ggplotly(plot3 + theme(
panel.background = element_rect(fill = "yellow",
colour = "yellow",)))
#4A According to the smooth line,the user can conclude that as the length of the flight increase, so does the speed of the flight.
nyc_flight_info_2 <- flights%>%
summarise(
Speed = (distance/hour),
Distance = distance
)
nyc_flight_info_2
## # A tibble: 336,776 × 2
## Speed Distance
## <dbl> <dbl>
## 1 280 1400
## 2 283. 1416
## 3 218. 1089
## 4 315. 1576
## 5 127 762
## 6 144. 719
## 7 178. 1065
## 8 38.2 229
## 9 157. 944
## 10 122. 733
## # … with 336,766 more rows
plot4 <- ggplot(data = nyc_flight_info_2, aes(x = Distance, y = Speed)) + geom_point() + geom_smooth() + ggtitle("New York's Flight's Speed According To Size") + theme_bw()
ggplotly(plot4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'