Please indicate
Use the mlb_teams.csv data set to create an informative data graphic that illustrates the relationship between winning percentage (WPct) and payroll in context.
library(ggthemes)
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(ggplot2)
get data
MLB <- read.csv("https://raw.githubusercontent.com/cmsc205/data/master/mlb_teams.csv")
setup plot. I think I will try a scatterplot to start or geom_point
MLBPlot <- ggplot(data = MLB,aes(x = WPct,y = payroll/1000000)) + geom_point()
Now I am going to add labels and titles. Later went back and added color, trendline, and divided payroll by one million to make it look nicer.
MLBPlot <- ggplot(data = MLB,aes(x = WPct,y = payroll/1000000)) +
geom_point(color = "blue") +
labs(x = "Winning Percentage, %", y = "Payroll in Millions of Dollars", title = "MLB: How Payroll affects Winning Percentage") +
geom_smooth(color = "black") +
theme_update()
print MLB
MLBPlot
## `geom_smooth()` using method = 'loess'
Using data from the nasaweather R package, use the path geometry (i.e. use a geom_path layer) to plot the path of each tropical storm in the storms data table. Use color to distinguish the storms from one another, and use faceting to plot each year in its own panel.
Hint: Don’t forget to install and load the nasaweather R package!
load package
library(nasaweather)
library(mdsr)
## Loading required package: mosaic
## Loading required package: lattice
## Loading required package: mosaicData
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
##
## The 'mosaic' package masks several functions from core packages in order to add additional features.
## The original behavior of these functions should not be affected by this.
##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
##
## mean
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following object is masked from 'package:ggthemes':
##
## theme_map
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cov, D, fivenum, IQR, median, prop.test,
## quantile, sd, t.test, var
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
need storms
storms <- storms
Filter out tropical storms
Tropical_Storms <- storms %>% filter(type == "Tropical Storm")
make plot
ggplot(Tropical_Storms, aes(x = lat, y = long)) +
geom_path() +
facet_wrap(~ year,ncol = 2)
add color to distinguish each storm
ggplot(Tropical_Storms, aes(x = lat, y = long)) +
geom_path(show.legend = FALSE,aes(color = name)) +
facet_wrap(~ year,ncol = 2) +
labs(y = "Longitude", x = "Latitude", title = "Path of Tropical Storms")
Using the data set Top25CommonFemaleNames.csv, recreate the “Median Names for Females with the 25 Most Common Names” graphic from FiveThirtyEight (link to graphic; link to full article).
read csv
FemaleNames <- read.csv("https://raw.githubusercontent.com/cmsc205/data/master/Top25CommonFemaleNames.csv")
setup plot Names on y axis and years on x axis
Females <- ggplot(data = FemaleNames,aes(x = median_age, y = name, ymin = 0, ymax = 0)) +
geom_linerange(color = "yellow") +
geom_point()
Females
Create Linerange
Females <- factor(FemaleNames, levels = FemaleNames[order("median_age")])
Females <- ggplot(data = FemaleNames,aes(x = name, y = median_age, ymin = q1_age, ymax = q3_age)) +
geom_linerange(aes(color = "yellow", size = 1)) +
geom_point() +
coord_flip()
Females
Need to reorder names so that they are in order by median age value
Females <- ggplot(data = FemaleNames,aes(x = reorder(name, -median_age), y = median_age, ymin = q1_age, ymax = q3_age)) +
geom_linerange(aes(size = 3), color = "goldenrod", show.legend = FALSE) +
geom_point(color = "red") +
coord_flip() + labs(title = "Median Ages For Females with the 25 Most Common Names", subtitle = "Among Americans estimated to be alive as of Jan. 1, 2014", x = NULL, y = "years old")
Females
editing axes and adding text
Females <- ggplot(data = FemaleNames,aes(x = reorder(name, -median_age), y = median_age, ymin = q1_age, ymax = q3_age)) +
geom_linerange(aes(size = 3), color = "goldenrod", show.legend = FALSE) +
geom_point(color = "red") +
coord_flip() + labs(title = "Median Ages For Females with the 25 Most\nCommon Names", subtitle = "Among Americans estimated to be alive as of Jan. 1, 2014", x = NULL, y = NULL) + scale_y_continuous(breaks = seq(15, 75, 10), position = "right") +
geom_text(label = "75th percentile", x = 16, y = 50, size = 3) +
geom_text(label = "25th", x = 16, y = 28, size = 3) +
theme_fivethirtyeight() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_line(linetype = "dotted")) +
geom_point(aes(x = 22, y = 63), show.legend = FALSE, color = "red") +
geom_text(label = "median", x = 22, y = 66, size = 3)
Females