# Set your working directory
### REPLACE WITH YOUR FILE PATH
setwd('/home/nicole/Documents/Teaching/Class_Documents/Lecture2')
# WINDOWS: setwd("c:/Documents/my/working/directory")
# MAC: setwd("/Users/yourname/working/directory")
ggplot2 is a powerful and easy to use plotting package available from the tidyverse (a set of R packages designed to work together and be easy to use.
Many ggplot2 resouces are available for free and online:
ggplot2 uses Tidy Data. What is it?
For the examples here we will use a simple dataset from the World Bank containg GDP per capita and Net Foreign Direct Investment.
df <- read.csv('WorldBank_GDP.csv')
head(df,25)
The minimum required for a plot to draw is:
Common types of geoms:
You can also add in geoms that annotate the plot:
See all the options int the ggplot2 reference files
# Require ggplot2
library(ggplot2)
# Plot GDP Per Capita
ggplot(df,aes(x=Year,
y=GDP_pc,
group=Country.Name,
color=Country.Name)) +
geom_line()
ggplot(df,aes(x=Year,
y=GDP_pc,
group=Country.Name,
color=Country.Name)) +
geom_point()
Each geom has its own requirements - see the help tab in the side consol to look up what the geom needs. You can also assign a plot to a variable for re-use using <- and layer mutiple plots over one another
For stat, additional transformations like means, sums, etc.:
For position, you adjust location of objects:
p <- ggplot(df[df$Country.Name %in% c('Mexico','Argentina','Brazil'),],
aes(x=Year,
y=GDP_pc,
group=Country.Name,
color=Country.Name)) +
geom_point(position='jitter')
# A grouped barplot
p1 <- ggplot(df[df$Country.Name %in% c('Mexico','Argentina','Brazil'),],
aes(x=Year,
y=GDP_pc,
fill=Country.Name))+
geom_bar(stat = "identity", position='dodge')
# A stacked barplot over time
p2 <- ggplot(df[df$Country.Name %in% c('Mexico','Argentina','Brazil'),],
aes(x=Year,
y=GDP_pc,
fill=Country.Name)) +
geom_bar(stat='identity')
# A percentage barplot over time
p3 <- ggplot(df[df$Country.Name %in% c('Mexico','Argentina','Brazil'),],
aes(x=Year,
y=GDP_pc,
fill=Country.Name)) +
geom_bar(stat='identity',
position='fill')
p4 <- ggplot(df[df$Country.Name %in% c('Mexico','Argentina','Brazil'),],
aes(x=Year)) +
stat_summary(fun.y=mean,
geom="line",
aes(y=GDP_pc)) +
stat_summary(fun.data = mean_se,
geom="errorbar",
aes(y=GDP_pc),
size = .5,
alpha=.7,
color='Black')
# Printing out the plots
p
p1
p2
p3
p4
use facet_wrap() or facet_grid() to create multiple graphs base on group, rather than having each group on the same plot.
ggplot(df[df$Country.Name %in% c('Mexico','Argentina','Brazil'),],
aes(x=Year,
y=GDP_pc,
group=Country.Name,
color=Country.Name)) +
geom_point()+
geom_line()+
facet_wrap(~ Country.Name)
To create two way graphs, you can either use data in long format (as above) or create individual geoms for each variable.
You can also create simple regression lines using geom_smooth(). The default method for geom_smooth() is ‘loess’, which computes a local regression for a small number of values.
# A cross-section scatterplot
ggplot(df[df$Year %in% 2012,],
aes(x=GDP_pc,
y=net_FDI,
color=Country.Name))+
geom_point(stat='identity',
position='identity',size=3)
# A cross-section scatter plot using geom_smooth()
ggplot(df[df$Year %in% 2012,],
aes(x=GDP_pc,
y=net_FDI))+
geom_point(stat='identity',
position='identity',
size=3) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
To add in customized labels:
You can also change the color of any geom or stat by adding in the argument ‘color=YourColor’
You can also change the color of your legend labels depending on your AES settings using
# Add labels to and change color lines of previous plot
p <- ggplot(df[df$Country.Name %in% c('Mexico','Argentina','Brazil'),],
aes(x=Year)) +
stat_summary(fun.y=mean,
geom="line",
aes(y=GDP_pc),
color='Navy') +
stat_summary(fun.data = mean_se,
geom="errorbar",
aes(y=GDP_pc),
size = .5,
alpha=.7,
color='grey25') +
labs(title="Mean GDP Per Capita: Mexico, Argentina, Brazil",
y = "Mean GDP Per Capita(US$)")
p
Theme() can change text size, edit legends, set colors etc.
Styles:
library(ggthemes)
# Some Examples
p + theme_classic()
p + theme_minimal()
# You can even make it look like STATA output
p + theme_stata() + scale_color_stata()
Use the ggsave() function to output your plots
ggsave(
filename = "my_plot.png",
plot = p,
width = 10,
height = 8,
dpi = 100,
device = "png"
)