knitr::opts_chunk$set(fig.width=4, fig.height=2) 

Part 1: test package install

“data” comes with R

test commands (copy and paste from Etherpad)

library(ggplot2) #load library
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color= drv)) + 
     geom_smooth(mapping = aes(linetype = drv), method = 'loess') +
     geom_point() + theme_classic()

Part 2: plotting in R: base graphics

There are some common plotting systems in R

-base plotting system -lattice package -ggplot2 package -except there are others (plotly, heatmap.2, igraph)

library(gapminder)
dim(gapminder)
## [1] 1704    6
str(gapminder)
## Classes 'tbl_df', 'tbl' and 'data.frame':    1704 obs. of  6 variables:
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ pop      : int  8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num  779 821 853 836 740 ...
View(gapminder)
plot(gapminder)

plot(gapminder$year, gapminder$lifeExp)

  boxplot(lifeExp~year, data=gapminder)

hist(gapminder$lifeExp)

Part 3: plotting in R: ggplot2

ggplot uses data + aestetics

take a base plot and add feature/components

library(ggplot2)
ggplot(gapminder)

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp))  ###these are global options/values

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +  ###these are global options/values
  geom_point()   ####options can be put here. they will only be applied to this ("geom") layer

ggplot(data = gapminder) +
  geom_point( mapping = aes(x = gdpPercap, y = lifeExp)) ##moved to local geom layer

##don't have to supply all that info
##this works too
ggplot(gapminder, aes(gdpPercap,lifeExp)) +
  geom_point()

Challenge 1

Modify the example so that the figure shows how life expectancy has changed over time

names(gapminder)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
ggplot(gapminder, aes(year, lifeExp)) +
  geom_point()

Challenge 2

x and y are not the only aesthetics (“aes”) that we can supply.

color, fill, size, linetype, shape, alpha, etc.

Modify the code from the previous challenge to color the points by the “continent” column. What trends do you see in the data? Are they what you expected?

ggplot(gapminder, aes(x = year, y = lifeExp, color = continent)) +
  geom_point()

too hard to look at. change it to a line plot

ggplot(gapminder, aes(x=year, y=lifeExp, by=country, color=continent)) +
  geom_line() ##see here, was "geom_point()

##"by" draws a line for every country

##ggplot Layers

##can keep adding layers
ggplot(gapminder, aes(x=year, y=lifeExp, by=country, color=continent)) +
  geom_line() +  ###this layer gets drawn first
  geom_point()   ###this one is drawn on top

ggplot(gapminder, aes(x=year, y=lifeExp, by=country)) +
  geom_line(aes(color=continent)) + ###moved global "continent" option to local
  geom_point()  ##no aes means default, which is black

these black dots seem sort of pointless to me. here is an example that might actually be needed

ggplot(gapminder,aes(x=gdpPercap, y=lifeExp, color=continent)) +
  geom_point()

#ggplot(gapminder,aes(x=gdpPercap, y=lifeExp, color=continent)) +
#  geom_point() +
#  geom_histogram() ###histograms don't want y-variables

ggplot(gapminder) +
  geom_point(aes(x=gdpPercap, y=lifeExp, color=continent)) +
  geom_histogram(aes(x=gdpPercap))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(gapminder) +
  geom_point(aes(x=gdpPercap, y=lifeExp, color=continent)) +
  geom_histogram(aes(x=gdpPercap), bins=400, color=NA, fill="gray66") +  ###histograms don't want y-variables  ##also, manually set the color. Notice that it is not in aes()
  theme_classic()

Challenge 3

sort of ugly to have the histogram blocking the dots

Modify the code so that you can see the dots.

ggplot(gapminder) +
  geom_histogram(aes(x=gdpPercap), bins=400, color=NA, fill="gray66") +
  geom_point(aes(x=gdpPercap, y=lifeExp, color=continent)) +
  theme_classic()

ggplot(gapminder) +
  geom_point(aes(x=gdpPercap, y=lifeExp, color=continent)) +
  geom_histogram(aes(x=gdpPercap), bins=400, color=NA, fill="gray66", alpha=0.5) +
  theme_classic()

Transformations and Statistics

ggplot(gapminder, aes(gdpPercap, lifeExp)) +
  geom_point() +
  theme_classic()

###large gdp countries make it hard to see the rest of the points, but seems
###to be some relationship between gdp and lifeexp

ggplot(gapminder, aes(gdpPercap, lifeExp)) +
  geom_point(alpha = 0.5) + 
  scale_x_log10()

ggplot(gapminder, aes(gdpPercap, lifeExp)) +
  geom_point(alpha = 0.5) + 
  scale_x_log10() +
  geom_smooth(method="lm") ##lm = linear model

ggplot(gapminder, aes(gdpPercap, lifeExp)) +
  geom_point(alpha = 0.5) + 
  scale_x_log10() +
  geom_smooth(method="lm", se=F, size=1.5, color="black") +
  theme_classic()

r2 <- round(summary(lm(lifeExp~gdpPercap, data=gapminder))$adj.r.squared, digits = 2)
p <- formatC(summary(lm(lifeExp~gdpPercap, data=gapminder))$coefficients[,4][2], format="e", digits = 2)

ggplot(gapminder, aes(gdpPercap, lifeExp)) +
  geom_point(alpha = 0.5) + 
  scale_x_log10() +
  geom_smooth(method="lm", se=F, size=1.5, color="black") +
  annotate("text", 1000, 84, label=paste("P=",p,", ","R2=",r2,sep="")) +
  theme_classic()

Challenge 4

Take a few minutes and play around with colors/shapes.

I frequently use color schemes from http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3

ggplot(gapminder, aes(gdpPercap, lifeExp)) +
  geom_point(alpha = 0.5, color="#756bb1") +   ###hex colors make it easy to keep illustrator/R consistent
  scale_x_log10() +
  geom_smooth(method="lm", se=F, size=1.5, color="black") +
  theme_classic()

ggplot(gapminder, aes(gdpPercap, lifeExp, color=continent)) +
  geom_point(alpha = 0.5) +
  scale_x_log10() +
  geom_smooth(method="lm", se=F, size=1.5, color="black") +
  theme_classic()

ggplot(gapminder, aes(gdpPercap, lifeExp, color=continent, group=continent)) +  ##what happened!!??
  geom_point(alpha = 0.5) +
  scale_x_log10() +
  geom_smooth(method="lm", se=F, size=1.5) +
  theme_classic()

Multi-panel plots and faceting

##use cowplot (Claus O. Wilke plot)  https://serialmentor.com/dataviz/
install.packages("cowplot")
## Installing package into '/home/jt/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
p1 <- ggplot(gapminder, aes(gdpPercap, lifeExp)) +
  geom_point(alpha = 0.5) + 
  scale_x_log10() +
  geom_smooth(method="lm", se=F, size=1.5, color="black") +
  annotate("text", 1000, 84, label=paste("P=",p,", ","R2=",r2,sep="")) +
  theme_classic()

p2 <- ggplot(gapminder, aes(gdpPercap, lifeExp, color=continent, group=continent)) +  ##what happened!!??
  geom_point(alpha = 0.5) +
  scale_x_log10() +
  geom_smooth(method="lm", se=F, size=1.5) +
  theme_classic()

plot_grid(p1, p2, nrow=1)

##use facet_wrap (part of ggplot2)
ggplot(gapminder, aes(gdpPercap, lifeExp, group=country)) +
  geom_line() + 
  facet_wrap( ~ continent, scales="free") +   ####use the ~ to tell facet how to break up data
                                              ###scales="free" allows the axis to be different
  theme(axis.text.x = element_text(angle = 90))

ggplot(gapminder, aes(gdpPercap, lifeExp, group=country)) +
  geom_line() + 
  facet_wrap( ~ continent, scales="free") + 
  theme(axis.text.x = element_text(angle = 90)) +
  labs(x="GDP (per capita)", y="Life expectance (years)", title="for Science")

bestplot <- ggplot(gapminder, aes(gdpPercap, lifeExp, group=country)) +
  geom_line() + 
  facet_wrap( ~ continent, scales="free") + 
  theme(axis.text.x = element_text(angle = 90)) +
  labs(x="GDP (per capita)", y="Life expectance (years)", title="for Science")


ggsave(filename = "~/Desktop/awesome.png", plot = bestplot, width = 12, height = 10, dpi = 300, units = "cm")

Challenge 5

Combine faceting methods to make supercool plot

plot_grid(p1, bestplot, nrow=1)