This file provides supporting code for an introduction to graphing with ggplot2.
ggplot(data = inequality) +
geom_bar(mapping = aes(x = cgv_dem)) ##shows the numerical frequency of each category of a variable
## Warning: Removed 2 rows containing non-finite values (stat_count).
ggplot(data = inequality) +
geom_bar(mapping = aes(x = cgv_dem, y=(..prop..))) ##shows the percentage of the total rather than the count
## Warning: Removed 2 rows containing non-finite values (stat_count).
ggplot(data = inequality) +
geom_histogram(mapping = aes(wb_gdppc), bins=12) ##bins is for the number of bars you want to represent the categorical data
## Warning: Removed 8 rows containing non-finite values (stat_bin).
ggplot(data = inequality) +
geom_bar(mapping = aes(x = cgv_dem, y = colbrit), stat = "identity")
## Warning: Removed 2 rows containing missing values (position_stack).
#shows the number of dictatorships/democracies that were british colonies
ggplot(data = inequality) +
geom_point(mapping = aes(x = wb_gdppc, y = wb_gini))
## Warning: Removed 152 rows containing missing values (geom_point).
ggplot(data = inequality) +
geom_line(mapping = aes(x = wb_gdppc, y = wb_gini))
## Warning: Removed 23 rows containing missing values (geom_path).
ggplot(data = inequality) +
geom_smooth(mapping = aes(x = wb_gdppc, y = wb_gini))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 152 rows containing non-finite values (stat_smooth).
## Formatting
ggplot(data = inequality) +
geom_bar(mapping = aes(x = cgv_dem, y = colbrit, fill=cgv_dem),
stat = "identity"
)
## Warning: Removed 2 rows containing missing values (position_stack).
ggplot(inequality, aes(x=cgv_dem, y=colbrit)) +
geom_bar(aes(fill=wb_gdppc), stat="identity")
## Warning: Removed 2 rows containing missing values (position_stack).
ggplot(data = inequality) +
geom_point(mapping = aes(x = wb_gdppc, y = wb_gini, color=cgv_dem))
## Warning: Removed 152 rows containing missing values (geom_point).
For a full list of Color Brewer palettes, go to: http://www.sthda.com/sthda/RDoc/figure/text-mining/word-cloud-generator-rcolorbrewer-palettes.png)
inequality$cgv_dem <- as.factor(inequality$cgv_dem)
ggplot(data = inequality) +
geom_point(mapping = aes(x = wb_gdppc, y = wb_gini, color=cgv_dem)) +
scale_color_brewer(palette = "Spectral")
## Warning: Removed 152 rows containing missing values (geom_point).
Note that you set the colors for the values of the variable listed under “color”
ggplot(data = inequality) +
geom_point(mapping = aes(x = wb_gdppc, y = wb_gini, color=cgv_dem)) +
scale_color_manual(values = c("0" = "black", "1" = "red", "NA" = "yellow"))
## Warning: Removed 152 rows containing missing values (geom_point).
Note that color is OUTSIDE the aes function, unlike above.
ggplot(data = inequality) +
geom_smooth(mapping = aes(x = wb_gdppc, y = wb_gini), color="red", fill="blue", size=2)
## `geom_smooth()` using method = 'loess'
## Warning: Removed 152 rows containing non-finite values (stat_smooth).
ggplot(data = inequality) +
geom_smooth(mapping=aes(x=wb_gini, y=wb_gdppc)) +
labs(
title = paste("The effect of Inequality on GDP"),
subtitle = paste("in 2000"),
caption = ("Source: World Bank"),
x = "Gini Coefficient",
y = "GDP per capita (US$)"
)
## `geom_smooth()` using method = 'loess'
## Warning: Removed 152 rows containing non-finite values (stat_smooth).
Overall range
ggplot(data = inequality) +
geom_point(mapping=aes(x=wb_gini, y=wb_gdppc)) +
coord_cartesian(ylim = c(0, 10000))
## Warning: Removed 152 rows containing missing values (geom_point).
Breaks within the range
ggplot(data = inequality) +
geom_point(mapping=aes(x=wb_gini, y=wb_gdppc)) +
coord_cartesian(ylim = c(0, 10000)) +
scale_y_continuous(breaks = seq(0, 10000, by = 2000))
## Warning: Removed 152 rows containing missing values (geom_point).
To label all points in the scatterplot:
ggplot(data = inequality) +
geom_text(mapping = aes(x = wb_gini, y = wb_gdppc, label=country)) +
coord_cartesian(ylim = c(0, 10000)) +
scale_y_continuous(breaks = seq(0, 10000, by = 2000))
## Warning: Removed 152 rows containing missing values (geom_text).
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 3.3.3
ggplot(data = inequality, mapping = aes(x = wb_gini, y = wb_gdppc)) +
geom_point() +
geom_label_repel(aes(label=country)) +
coord_cartesian(ylim = c(0, 10000)) +
scale_y_continuous(breaks = seq(0, 10000, by = 2000))
## Warning: Removed 152 rows containing missing values (geom_point).
## Warning: Removed 152 rows containing missing values (geom_label_repel).
To label outliers in the plot, you should subset the data:
ggplot(inequality, aes(wb_gini, wb_gdppc)) +
geom_point() +
geom_text_repel(data = subset(inequality, wb_gdppc>15000), mapping = aes(label = country))
## Warning: Removed 152 rows containing missing values (geom_point).
## Warning: Removed 37 rows containing missing values (geom_text_repel).
Legend name:
ggplot(data = inequality) +
geom_point(mapping=aes(x=wb_gini, y=wb_gdppc, colour=cgv_dem)) +
coord_cartesian(ylim = c(0, 10000)) +
scale_y_continuous(breaks = seq(0, 10000, by = 2000)) +
labs(
title = paste("The effect of inequality on GDP"),
subtitle = paste("in 2000"),
caption = ("Source: World Bank, CGV"),
x = "Gini Coefficient",
y = "GDP per capita (US$)",
colour = "Democracy"
)
## Warning: Removed 152 rows containing missing values (geom_point).
Legend position:
ggplot(data = inequality) +
geom_point(mapping=aes(x=wb_gini, y=wb_gdppc, colour=cgv_dem)) +
coord_cartesian(ylim = c(0, 10000)) +
scale_y_continuous(breaks = seq(0, 10000, by = 2000)) +
labs(
title = paste("The effect of inequality on GDP"),
subtitle = paste("in 2000"),
caption = ("Source: World Bank, CGV"),
x = "Gini Coefficient",
y = "GDP per capita (US$)",
colour = "Democracy"
) +
theme(legend.position = "bottom")
## Warning: Removed 152 rows containing missing values (geom_point).
To change the range of the legend/label values, you need to change the underlying variable:
inequality$cgv_dem <- as.factor(inequality$cgv_dem)
levels(inequality$cgv_dem) = c("No", "Yes", "N/A")
ggplot(data = inequality) +
geom_point(mapping=aes(x=wb_gini, y=wb_gdppc, colour=cgv_dem)) +
coord_cartesian(ylim = c(0, 10000)) +
scale_y_continuous(breaks = seq(0, 10000, by = 2000)) +
labs(
title = paste("The effect of inequality on GDP"),
subtitle = paste("in 2000"),
caption = ("Source: World Bank, CGV"),
x = "Gini Coefficient",
y = "GDP per capita (US$)",
colour = "Democracy"
) +
theme(legend.position = "bottom")
## Warning: Removed 152 rows containing missing values (geom_point).
To include both points and line, add them together:
ggplot(data = inequality) +
geom_point(mapping = aes(x = wb_gdppc, y = wb_gini)) +
geom_smooth(mapping = aes(x = wb_gdppc, y = wb_gini))
## `geom_smooth()` using method = 'loess'
## Warning: Removed 152 rows containing non-finite values (stat_smooth).
## Warning: Removed 152 rows containing missing values (geom_point).
To add a reference line:
ggplot(data = inequality) +
geom_point(mapping = aes(x = wb_gdppc, y = wb_gini)) +
geom_hline(aes(yintercept=0))
## Warning: Removed 152 rows containing missing values (geom_point).
ggplot(data = inequality) +
geom_point(mapping = aes(x = wb_gdppc, y = wb_gini)) +
geom_vline(aes(xintercept=0))
## Warning: Removed 152 rows containing missing values (geom_point).
ggplot(data = inequality) +
geom_point(mapping=aes(x=wb_gini, y=wb_gdppc)) +
geom_smooth(mapping=aes(x=wb_gini, y=wb_gdppc), method='lm', formula= y ~ x) +
scale_y_continuous(breaks = seq(0, 12000, by = 2000)) +
labs(
title = paste("The effect of Inequality on GDP per capita"),
subtitle = paste("in 2000"),
caption = ("Source: World Bank"),
x = "Gini Coefficient",
y = "GDP per capita (US$)") +
coord_cartesian(ylim = c(0, 12000))
## Warning: Removed 152 rows containing non-finite values (stat_smooth).
## Warning: Removed 152 rows containing missing values (geom_point).