lapply(c("ggplot2","readr","tidyverse","RColorBrewer"),library,character.only=1) #load multiple packages in one line
## [[1]]
## [1] "ggplot2" "stats" "graphics" "grDevices" "utils" "datasets"
## [7] "methods" "base"
##
## [[2]]
## [1] "readr" "ggplot2" "stats" "graphics" "grDevices" "utils"
## [7] "datasets" "methods" "base"
##
## [[3]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "tidyr"
## [7] "tibble" "tidyverse" "readr" "ggplot2" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "RColorBrewer" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "tidyr" "tibble" "tidyverse" "readr"
## [11] "ggplot2" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
mort_dataframe <- read.csv("~/indicatordeadkids35.csv",check.names = FALSE)
# mort_tibble <- read_csv("./indicatordeadkids35.csv")
str(mort_dataframe)
## 'data.frame': 197 obs. of 255 variables:
## $ : chr "Afghanistan" "Albania" "Algeria" "Angola" ...
## $ 1760: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1761: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1762: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1763: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1764: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1765: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1766: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1767: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1768: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1769: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1770: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1771: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1772: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1773: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1774: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1775: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1776: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1777: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1778: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1779: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1780: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1781: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1782: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1783: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1784: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1785: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1786: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1787: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1788: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1789: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1790: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1791: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1792: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1793: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1794: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1795: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1796: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1797: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1798: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1799: num NA NA NA NA NA NA NA NA NA NA ...
## $ 1800: num NA 2.36 NA 4.44 3.7 ...
## $ 1801: num NA 2.36 NA 4.44 3.7 ...
## $ 1802: num NA 2.36 NA 4.44 3.7 ...
## $ 1803: num NA 2.36 NA 4.44 3.7 ...
## $ 1804: num NA 2.36 NA 4.44 3.7 ...
## $ 1805: num NA 2.36 NA 4.44 3.7 ...
## $ 1806: num NA 2.36 NA 4.44 3.7 ...
## $ 1807: num NA 2.36 NA 4.44 3.7 ...
## $ 1808: num NA 2.36 NA 4.44 3.7 ...
## $ 1809: num NA 2.36 NA 4.44 3.7 ...
## $ 1810: num NA 2.36 NA 4.44 3.7 ...
## $ 1811: num NA 2.36 NA 4.44 3.7 ...
## $ 1812: num NA 2.36 NA 4.45 3.7 ...
## $ 1813: num NA 2.36 NA 4.45 3.7 ...
## $ 1814: num NA 2.36 NA 4.45 3.7 ...
## $ 1815: num NA 2.36 NA 4.45 3.7 ...
## $ 1816: num NA 2.36 NA 4.45 3.7 ...
## $ 1817: num NA 2.36 NA 4.45 3.7 ...
## $ 1818: num NA 2.36 NA 4.45 3.7 ...
## $ 1819: num NA 2.36 NA 4.45 3.7 ...
## $ 1820: num NA 2.36 NA 4.45 3.7 ...
## $ 1821: num NA 2.36 NA 4.45 3.7 ...
## $ 1822: num NA 2.36 NA 4.45 3.7 ...
## $ 1823: num NA 2.36 NA 4.45 3.7 ...
## $ 1824: num NA 2.36 NA 4.45 3.7 ...
## $ 1825: num NA 2.36 NA 4.45 3.7 ...
## $ 1826: num NA 2.36 NA 4.45 3.7 ...
## $ 1827: num NA 2.36 NA 4.45 3.7 ...
## $ 1828: num NA 2.36 NA 4.45 3.7 ...
## $ 1829: num NA 2.36 NA 4.45 3.7 ...
## $ 1830: num NA 2.36 NA 4.45 3.7 ...
## $ 1831: num NA 2.36 NA 4.45 3.7 ...
## $ 1832: num NA 2.36 NA 4.45 3.7 ...
## $ 1833: num NA 2.36 NA 4.46 3.7 ...
## $ 1834: num NA 2.36 NA 4.46 3.7 ...
## $ 1835: num NA 2.36 NA 4.46 3.7 ...
## $ 1836: num NA 2.36 NA 4.46 3.7 ...
## $ 1837: num NA 2.36 NA 4.46 3.7 ...
## $ 1838: num NA 2.36 NA 4.46 3.7 ...
## $ 1839: num NA 2.36 NA 4.46 3.7 ...
## $ 1840: num NA 2.36 NA 4.46 3.7 ...
## $ 1841: num NA 2.36 NA 4.46 3.7 ...
## $ 1842: num NA 2.36 NA 4.46 3.7 ...
## $ 1843: num NA 2.36 NA 4.46 3.7 ...
## $ 1844: num NA 2.36 NA 4.46 3.7 ...
## $ 1845: num NA 2.36 NA 4.46 3.7 ...
## $ 1846: num NA 2.36 NA 4.46 3.7 ...
## $ 1847: num NA 2.36 NA 4.46 3.7 ...
## $ 1848: num NA 2.36 NA 4.46 3.7 ...
## $ 1849: num NA 2.36 NA 4.46 3.7 ...
## $ 1850: num NA 2.36 NA 4.46 3.7 ...
## $ 1851: num NA 2.36 NA 4.46 3.7 ...
## $ 1852: num NA 2.36 NA 4.46 3.7 ...
## $ 1853: num NA 2.36 NA 4.46 3.7 ...
## $ 1854: num NA 2.36 NA 4.46 3.7 ...
## $ 1855: num NA 2.36 NA 4.47 3.7 ...
## $ 1856: num NA 2.36 NA 4.47 3.7 ...
## $ 1857: num NA 2.36 NA 4.47 3.7 ...
## [list output truncated]
names(mort_dataframe)[1] <- "country"
#colnames
# rename(mort_tibble, country = "...1")
long <- pivot_longer(mort_dataframe,cols=-country,names_to="year",values_to="morts")
str(long)
## tibble [50,038 × 3] (S3: tbl_df/tbl/data.frame)
## $ country: chr [1:50038] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ year : chr [1:50038] "1760" "1761" "1762" "1763" ...
## $ morts : num [1:50038] NA NA NA NA NA NA NA NA NA NA ...
long <- mutate(long,year=as.numeric(year))
str(long)
## tibble [50,038 × 3] (S3: tbl_df/tbl/data.frame)
## $ country: chr [1:50038] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ year : num [1:50038] 1760 1761 1762 1763 1764 ...
## $ morts : num [1:50038] NA NA NA NA NA NA NA NA NA NA ...
avg_country <- summarize(group_by(long,country),am = mean(morts,na.rm=TRUE))
avg_year<- summarize(group_by(long,year),ay = mean(morts,na.rm=TRUE))
Aesthetics are mappings between the variables in the data and visual properties in the plots. Aesthetics are set in the aes() function and the most common aesthetics are
If you set these in aes, then you set them to a variable. If you want to set them for all values, set them in a geom.
The other essential element of a ggplot is a geom layer to determine how the data will be plotted.
sweden_long = filter(long,country == "Sweden")
g = ggplot(sweden_long, aes(x = year, y = morts))
g
g + geom_point()
g + geom_line()
g + geom_line() + geom_smooth()
#### try others
g + geom_area()
g + geom_violin()
If we want to change the data we are using in the plot, we need to make
a new call to ggplot. For example, now let’s look at the mortality rates
over time using line plots for each of the countries: United States,
United Kingdom, Sweden, Afghanistan, Rwanda. To get a line for each
country individually, we need to specify the group aesthetic and map it
to the country variable.
sub <- filter(long, country %in% c("United States", "United Kingdom", "Sweden","Afghanistan", "Rwanda"))
g <- ggplot(sub,aes(x = year, y = morts, group = country))
g + geom_line()
Note that we have a single plot with a trajectory over time of the
mortality rates for each of these five countries, but we cannot tell
which country corresponds to which line. We will see how to fix this by
using color and a legend in the upcoming sections.
gg <- g + geom_line() +
labs(x = "Year", y = "Mortality Rate", title = "Child Mortality Rates",
subtitle = "Stratified by Country")
gg
The x and y axis limits can be adjusted using the xlim() and ylim()
functions to change the view of the plotting regions. For example, let’s
zoom in on the years 1900-2000 for the bottom three lines. The mortality
rates in this region appear to range from 0 to 1.5.
gg + xlim(c(1900, 2000)) + ylim(c(0, 1.5))
We may also want to change the position and appearance of the text appearing in the titles or axes. In order to make these changes, we need to use the theme function (see ?theme for all this function can do). theme controls most of the look and feel of the plot. The arguments passed to theme components are required to be set using special element_type() functions. There are four major types.
Inside element_text we can set
gg + theme(plot.title = element_text(size = 20,
face = "bold",
family = "American Typewriter",
color = "tomato",
hjust = 0.5,
lineheight = 1.2),
plot.subtitle = element_text(size = 15,
family = "American Typewriter",
face = "bold",
hjust = 0.5),
axis.title.x = element_text(vjust = .5,
size = 15), # X axis title
axis.title.y = element_text(size = 15) )
## Plotting Characters, Line Types and Colors
ggplot(sweden_long, aes(x = year, y = morts)) + geom_point(shape = 2)
R also has 7 different line types that can be chosen by the numbers 0 to 6 or by name (e.g. “blank”, “solid”, “dashed”, etc.). We set this by using the linetype aesthetic.
ggplot(sweden_long, aes(x = year, y = morts)) + geom_line(linetype = 2)
gg + geom_point(aes(shape = country))
gg + geom_line(aes(linetype = country))
We can also modify the colors used in the plots by using the color aesthetic or the fill aesthetic depending on the plot type.
gg + geom_point(aes(col = country))
It’s actually pretty hard to make a good color palette. Luckily, smart and artistic people have spent a lot more time thinking about this. The result is the RColorBrewer package
install.packages("RColorBrewer")
library(RColorBrewer)
gg + geom_point(aes(color = country)) + scale_color_brewer(type = "seq", palette = "Dark2")
### Modifying a Legend
paste()
## character(0)
paste0()
## character(0)
gg + geom_line(aes(color = country)) +
scale_color_brewer(type = "seq", palette = "Dark2", name = "Country",
labels = paste("Country", 1:5))
gg + geom_line(aes(color = country)) +
scale_color_brewer(type = "seq", palette = "Dark2", name = "Country",
labels = paste("Country", 1:5)) + theme(legend.position = "bottom")
gg + geom_line(aes(color = country)) +
scale_color_brewer(type = "seq", palette = "Dark2", name = "Country",
labels = paste("Country", 1:5)) + theme(legend.position= c(0.85,0.95))
## Drawing Mulitple Plots in a Single Figure
gg + geom_point() +
geom_line(aes(color = country)) +
facet_wrap(~ country)
We can adjust the layout and number of rows and columns by using nrow and ncol in facet_wrap.
ggplot(sub, aes(x = year, y = morts)) +
geom_point() +
geom_line(aes(color = country)) +
facet_wrap(~ country, nrow = 1)
ggplot2 provides ggsave to save plots in a number of formats, such as
.png or .pdf. This function saves the last plot that you displayed.
gg
ggsave("./fitplot.png", width = 4, height = 4)
For these exercises, we will use the charm city circulator bus ridership dataset, Charm_City_Circulator_Ridership.csv. After modifying the path to the dataset on your computer, use the following code to read in and transform the dataset to be ready for use in plotting.
#library(lubridate)
#circ <- read.csv("./Charm_City_Circulator_Ridership.csv",check.names = FALSE)
#str(circ)
#circ <- mutate(circ,date = mdy(date))