Please indicate
Use the mlb_teams.csv data set to create an informative data graphic that illustrates the relationship between winning percentage (WPct) and payroll in context.
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
mlb_data <- read_csv("https://raw.githubusercontent.com/cmsc205/data/master/mlb_teams.csv")
## Parsed with column specification:
## cols(
## yearID = col_integer(),
## teamID = col_character(),
## lgID = col_character(),
## W = col_integer(),
## L = col_integer(),
## WPct = col_double(),
## attendance = col_integer(),
## normAttend = col_double(),
## payroll = col_integer(),
## metroPop = col_integer(),
## name = col_character()
## )
library(ggplot2)
WPctplot <- ggplot(data = mlb_data, mapping = aes(x = payroll/1000000, y = WPct)) +
geom_point(color = "forestgreen", size = 1.75) +
labs(title = "Winning percentage of MLB teams", subtitle = "based on payroll between 2008 and 2014", x = "Payroll (in millions of dollars)", y = "Winning Percentage")
WPctplot
Using data from the nasaweather R package, use the path geometry (i.e. use a geom_path layer) to plot the path of each tropical storm in the storms data table. Use color to distinguish the storms from one another, and use faceting to plot each year in its own panel.
library(nasaweather)
data(storms)
library(ggplot2)
ggplot(data = storms, mapping = aes(x = lat, y = long, color = name)) +
geom_path() +
facet_wrap( ~ year)+
labs(title = "Paths of Tropical Storms\nfrom 1995 to 2000", x = "latitude", y = "longitude") +
xlim(0, 90)
Hint: Don’t forget to install and load the nasaweather R package!
Using the data set Top25CommonFemaleNames.csv, recreate the “Median Names for Females with the 25 Most Common Names” graphic from FiveThirtyEight (link to graphic; link to full article).
library(tidyverse)
CommonFNames <- read_csv("https://raw.githubusercontent.com/cmsc205/data/master/Top25CommonFemaleNames.csv")
## Parsed with column specification:
## cols(
## name = col_character(),
## n = col_integer(),
## est_num_alive = col_double(),
## q1_age = col_integer(),
## median_age = col_integer(),
## q3_age = col_integer()
## )
library(ggplot2)
library(fontcm)
CFNplot <- ggplot(data = CommonFNames, mapping = aes(x = reorder(name, -median_age), y = median_age)) +
geom_linerange(aes(ymin = q1_age, ymax = q3_age), size = 5.3, colour = "#FFE18D") +
geom_point(data = CommonFNames, mapping = aes(x = reorder(name, -median_age), y = median_age, colour = "#F30000"), size = 2) +
theme(panel.background = element_blank(),
legend.position = c(0.7, 0.95),
legend.background = element_rect(fill = "white")) +
labs(x = NULL, y = NULL, title = "Median Ages For Females With the 25 Most\nCommon Names", subtitle = "Among Americans estimated to be alive as of Jan. 1, 2014
")+
theme(legend.title = element_blank()) +
scale_color_manual(labels = c("median"), values = c("#F30000")) +
coord_flip() +
scale_y_discrete(limits = c(15, 25, 35, 45, 55, 65, 75), labels = c("15" = "15 Years old"), position = "top") +
geom_hline(yintercept = c(15,25,35,45,55,65, 75), color="black", linetype = "dotted", size=1)
CFNplot
CFNplot +
geom_text(x = 16.1, y = 25.9, label = "< 25th", size = 3.4) +
geom_text(x = 16.1, y = 51, label = "75th Percentile >", size = 3.4)