1. Intro to the layers of ggplot (data, aes, geom)
2. Changing the aesthetics (shape, color, size, fill, alpha)
3. Scatterplots
4. Histograms
5. Boxplots
6. Bar Graphs
7. Line Graphs
BONUS: Animated Graphs
mtcars$cyl <- as.factor(mtcars$cyl)
# Data, aethetics (x, y), point
ggplot(mtcars, aes(cyl, mpg)) +
geom_point()# Change the color aesthetic to a size aesthetic
ggplot(mtcars, aes(wt, mpg, color = disp, size = disp)) +
geom_point()As a general rule, if you want to set an aesthetic to a specific value, you would specify that outside of aes(). For example, if you specify (color = “blue”), you would not place it in aes(). However, if you want to specify how the aesthetics should be used, you would place it inside aes(). For example, if you want gender to be represented as separate colors, you would use (aes(color = gender)).
#Simple scatter with smooth with CI
scatter <- ggplot(examData, aes(Anxiety, Exam))
scatter + geom_point() + geom_smooth() + labs(x = "Exam Anxiety", y = "Exam Performance %") ## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#Simple scatter with regression line
scatter <- ggplot(examData, aes(Anxiety, Exam))
scatter + geom_point() + geom_smooth(method = "lm", colour = "Red", se = F) + labs(x = "Exam Anxiety", y = "Exam Performance %") #Simple scatter with regression line + CI
scatter <- ggplot(examData, aes(Anxiety, Exam))
scatter + geom_point() + geom_smooth(method = "lm", colour = "Red") + labs(x = "Exam Anxiety", y = "Exam Performance %") #Simple scatter with regression line + coloured CI
scatter <- ggplot(examData, aes(Anxiety, Exam))
scatter + geom_point() + geom_smooth(method = "lm", colour = "Red", alpha = 0.1, fill = "Red") + labs(x = "Exam Anxiety", y = "Exam Performance %") #Grouped scatter with regression line + CI
scatter <- ggplot(examData, aes(Anxiety, Exam, colour = Gender))
scatter + geom_point() + geom_smooth(method = "lm", aes(fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %", colour = "Gender")scatter2 <- scatter + geom_point() + geom_smooth(method = "lm", aes(fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %", colour = "Gender")
# Scatter plot
scatter2 + scale_color_manual(values=c("blue", "green"))# Change line color
# http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
scatter2 + scale_color_manual(values=c("deepskyblue1", "darkseagreen1"))# Change plot color
scatter2 + theme(plot.background = element_rect(fill = 'black', color = 'black'),
panel.background = element_rect(fill = 'black'),
axis.title = element_text(color = 'white'))festivalHistogram <- ggplot(festivalData, aes(day1))
festivalHistogram + geom_histogram(binwidth = 0.4) + labs(x = "Hygiene (Day 1 of Festival)", y = "Frequency")# install.packages("extrafont")
library(extrafont)
# Change color and font
festivalHistogram + geom_histogram(binwidth=0.2,color="black", fill="lightskyblue2") + theme(text = element_text(size = 12, family = "Comic Sans MS")) + facet_wrap("gender")festivalDensity <- ggplot(festivalData, aes(day1))
festivalDensity + geom_density() + labs(x = "Hygiene (Day 1 of Festival)", y = "Density Estimate")# Change opacity and labels
festivalDensity + geom_density(aes(fill = gender), alpha = 0.4) + labs(x = "Hygiene (Day 1 of Festival)", y = "Density Estimate")festivalBoxplot2 <- ggplot(festivalData, aes(gender, day1))
festivalBoxplot2 + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)")festivalBoxplot2 + geom_boxplot(aes(fill = gender)) + geom_point() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)")festivalBoxplot2 + geom_boxplot(aes(fill = gender)) + geom_jitter(alpha = 0.2) + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)")library(haven)
#load dictator game data
df_dg <- read_sav("sample_dictatorgame.sav")
# inspect data
head(df_dg)# change condition to factor with labels
df_dg$condition[df_dg$condition == -1] <- "Control"
df_dg$condition[df_dg$condition == 1] <- "Experimental"
df_dg$role[df_dg$role == -1] <- "Partner"
df_dg$role[df_dg$role == 1] <- "Actor"bar <- ggplot(df_dg, aes(condition, share))
bar2 <- bar +
stat_summary(aes(condition, share, fill = role ),
fun = mean, # display the means
geom = "bar",
position="dodge") +
stat_summary(aes(condition, share, fill = role ),
fun.data = mean_cl_normal, # 95% CI assuming normality (other option would be _boot)
geom = "errorbar",
position=position_dodge(width=0.90),
width = 0.2) +
labs(x = "Condition", y = "Money Shared", fill = "Role") +
scale_fill_manual(values=c("deepskyblue1", "slategray3"))
bar2library(ggsignif)
bar <- ggplot(df_dg, aes(condition, share))
bar2 +
scale_y_continuous(breaks=seq(0, 15, 1)) + # specifies breaks (0-15 at every 1pt)
coord_cartesian(ylim =c(0, 15)) + # y-axis on 0-15 scale
geom_signif(y_position = c(7.6, 8.5), xmin = c(0.8, 1.8), xmax = c(1.2, 2.2), #sig bars between roles
annotation = c("NS", "**"), tip_length = 0, color= "#756F6F") +
geom_signif(comparisons = list(c("Control", "Experimental")), map_signif_level=TRUE, #sig bars between condition
annotations = "NS", y_position = 11, color= "#756F6F") + theme_classic()depression_data <- read.csv("depression_example_data.csv")
# Simulate meaningful fake data
## Depression over time with/without treatment
fake_data3 <- sample(c(0,1), size = nrow(depression_data), replace = TRUE)
fake_data3 <- as.data.frame(ifelse(fake_data3==1, yes = rnorm(20, 20, 1), no = rnorm(20, 20, 1)))
dep_data <- cbind(depression_data, fake_data3)
names(dep_data)[4] <- "Baseline"
fake_data <- sample(c(0,1), size = nrow(depression_data), replace = TRUE)
fake_data <- as.data.frame(ifelse(fake_data==1, yes = rnorm(20, 20, 2), no = rnorm(20, 12, 2)))
names(fake_data)[1] <- "Six_Weeks"
fake_data <- fake_data %>% arrange(Six_Weeks)
dep_data <- dep_data %>% arrange(desc(intervention))
dep_data <- cbind(dep_data, fake_data)
dep_data$intervention[dep_data$intervention == 0] <- "No Intervention"
dep_data$intervention[dep_data$intervention == 1] <- "Intervention"
dep_data$ID <- seq_along(dep_data[,1])This is a longitudinal dataset. To plot it as a line graph, we’ll need to convert it to a long dataset. Use pivot_longer() to reshape the data and call the new dataframe “dep_data1”
line <- ggplot(dep_data1, aes(Time, Depression_Level, color = intervention))
line + stat_summary(fun = mean, geom = "line", aes(group = intervention))line + stat_summary(fun = mean, geom = "line", aes(group= intervention)) + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) + labs(x = "Time", y = "Depression", colour = "Intervention") + ylim(5, 30)line +
stat_summary(fun = mean, geom = "point", aes(shape = intervention), size = 4) + # Shape of point by group
stat_summary(fun = mean, geom = "line", aes(group= intervention, linetype = intervention)) + # Dashed or solid line by group
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) + # 95% CI
labs(x = "Time", y = "Mean Depression Score", colour = "Group", shape= "Group", linetype = "Group") + ylim(5, 30) # Labels and range of y-axislibrary(RColorBrewer)
library(gapminder)
library(dplyr)
gm2007.1 <- gapminder %>%
filter(year == 2007) %>%
slice_max(lifeExp, n = 10)
gm2007.2 <- gapminder %>%
filter(year == 2007) %>%
slice_min(lifeExp, n = 10)
gm2007 <- rbind(gm2007.1, gm2007.2)
# Add a geom_segment() layer
ggplot(gm2007, aes(x = lifeExp, y = country, color = lifeExp)) +
geom_point(size = 4) +
geom_segment(aes(xend = 30, yend = country), size = 2)# Set the color scale
palette <- brewer.pal(5, "RdYlBu")[-(2:4)]
global_mean <- mean(gm2007$lifeExp)
x_start <- global_mean + 3
y_start <- 13
x_end <- global_mean
y_end <- 13.5
# Add a title and caption
plt_country_vs_lifeExp <- ggplot(gm2007, aes(x = lifeExp, y = country, color = lifeExp)) +
geom_point(size = 4) +
geom_segment(aes(xend = 30, yend = country), size = 2) +
geom_text(aes(label = round(lifeExp,1)), color = "white", size = 1.5) +
scale_x_continuous("", expand = c(0,0), limits = c(30,90), position = "top") +
scale_color_gradientn(colors = palette) +
labs(title = "Highest and lowest life expectancies, 2007", caption = "Source: gapminder")
plt_country_vs_lifeExp +
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text = element_text(color = "black"),
axis.title = element_blank(),
legend.position = "none") +
geom_vline(xintercept = global_mean, color = "grey40", linetype = 3) +
annotate("text", x = x_start, y = y_start, label = "The\nglobal\naverage", vjust = 1.1, size = 3, family = "Times", color = "grey40") +
annotate("curve", x = x_start, y = y_start, xend = x_end, yend = y_end, arrow = arrow(length = unit(0.1, "cm"), type = "closed"), color = "grey40"
) ### Install animation packages
#devtools::install_github('thomasp85/gganimate')
library(gganimate)
#install.packages("gifski")
library(gifski)
library(gapminder)
gg <- gapminder
ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
geom_point(alpha = 0.7, show.legend = FALSE) +
scale_colour_manual(values = country_colors) +
scale_size(range = c(2, 12)) +
scale_x_log10() +
facet_wrap(~continent) +
# Here comes the gganimate specific bits
labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
transition_time(year) +
ease_aes('linear')