- principles of data vis
- grammar of graphics
- aesthetics and attributes
- geometries
- major tools
- cosmetics I
- resources
- cosmetics II (homework)
blomkvist <- read_csv("data/blomkvist.csv")
glimpse(blomkvist)
Rows: 267 Columns: 10 $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, … $ sex <chr> "male", "female", "female", "female", "male", "male", "fema… $ age <dbl> 84, 37, 62, 85, 73, 65, 30, 49, 83, 58, 25, 88, 62, 88, 27,… $ medicine <dbl> 8, 1, 0, 4, 5, 0, 0, 0, 11, 0, 0, 4, 3, 8, 1, 3, 4, 1, 1, 0… $ meds_cat <chr> "a lot", "little", "none", "few", "a lot", "none", "none", … $ smoker <chr> "former", "no", "yes", "former", "former", "no", "no", "for… $ rt_hand_d <dbl> 701.6667, 470.6667, 638.6667, 708.0000, 607.3333, 541.6667,… $ rt_hand_nd <dbl> 780.3333, 497.0000, 638.0000, 638.6667, 652.0000, 498.6667,… $ rt_foot_d <dbl> 1009.0000, 737.6667, 878.0000, 902.3333, 923.0000, 686.6667… $ rt_foot_nd <dbl> 962.6667, 692.3333, 786.0000, 1373.6667, 805.0000, 599.6667…
ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d)) ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d)) +
geom_point()
ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d)) +
geom_point() +
scale_y_log10()
ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d)) +
geom_point() +
scale_y_log10() +
stat_smooth(method = "lm")
ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d)) +
geom_point() +
scale_y_log10() +
stat_smooth(method = "lm",
formula = y ~ x + I(x^2))
ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d,
colour = smoker)) +
geom_point() +
scale_y_log10() +
stat_smooth(method = "lm",
formula = y ~ x + I(x^2)) ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d,
colour = smoker)) +
geom_point(alpha = .25) +
scale_y_log10(labels = scales::comma) +
stat_smooth(method = "lm", formula = y ~ x + I(x^2), se = FALSE, fullrange = TRUE) +
ggthemes::theme_clean() +
ggthemes::scale_color_colorblind() +
labs(y = "Average reaction time of dominant\nhand (in msecs)",
x = "Age (in years)",
caption = "Data published in\nBlomkvist et al. (2017)",
colour = "Smoker") +
theme(legend.position = "top",
legend.justification = "right",
axis.title = element_text(hjust = 0))
Open script exercises/1_scatterplots.R
| Data set | Mean | SD | Mean | SD | Correlation | Intercept | Slope |
|---|---|---|---|---|---|---|---|
| 1 | 9 | 3.32 | 7.5 | 2.03 | 0.82 | 3 | 0.5 |
| 2 | 9 | 3.32 | 7.5 | 2.03 | 0.82 | 3 | 0.5 |
| 3 | 9 | 3.32 | 7.5 | 2.03 | 0.82 | 3 | 0.5 |
| 4 | 9 | 3.32 | 7.5 | 2.03 | 0.82 | 3 | 0.5 |
Open script exercises/tdd.R
Hartwig and Dearing (1979):
Tufte (1983):
ggplot2 refers to grammar of graphics (Wickham 2016, 2010)plot(), hist())ggplot(data = ...)mapping = aes()geom_...()ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d))ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d)) +
geom_point()
ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d)) +
geom_quantile()
ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d)) +
geom_rug()
ggplot(data = blomkvist,
mapping = aes(x = age,
y = rt_hand_d)) +
geom_point() +
geom_quantile() +
geom_rug()
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point()
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point() + facet_grid(~sex)
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point() + stat_smooth(method = "lm", se = FALSE)
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point() + coord_trans(x = "log", y = "reverse")
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point() + coord_flip()
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point() + theme_dark()
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point() + theme(panel.background = element_blank())
Open script exercises/2a_grammar_of_graphics.R
Bonus: exercises/2b_grammar_of_graphics.R
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point(colour = "red")
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point(aes(colour = smoker))
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point(aes(colour = smoker)) + stat_smooth(method = "lm")
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point() + stat_smooth(aes(colour = smoker), method = "lm")
ggplot(blomkvist, aes(x = age, y = rt_hand_d)) + geom_point(aes(colour = smoker)) + stat_smooth(aes(colour = smoker), method = "lm")
ggplot(blomkvist, aes(x = age, y = rt_hand_d, colour = smoker)) + geom_point() + stat_smooth(method = "lm")
ggplot(blomkvist, aes(x = age, y = rt_hand_d, colour = smoker)) + geom_point(size = 2.5)
ggplot(blomkvist, aes(x = age, y = rt_hand_d, shape = smoker)) + geom_point(size = 2.5)
ggplot(blomkvist, aes(x = age, y = rt_hand_d, colour = smoker, shape = smoker)) + geom_point(size = 2.5)
ggplot(blomkvist, aes(x = age, y = rt_hand_d, colour = smoker, shape = sex)) + geom_point(size = 2.5)
x, y, colour, fill, groupgeom_point()x, y, shape, colour, size, fill, alpha, stroke, group
geom_bar()x, y, colour, fill, linewidth, linetype, alpha, group
geom_boxplot()x, y, lower, xlower, upper, xupper, middle, xmiddle, ymin, xmin, ymax, xmax, weight, colour, fill, size, alpha, shape, linetype, linewidth, group
ggplot(blomkvist,
aes(x = smoker,
y = rt_hand_d)) +
geom_jitter() ggplot(blomkvist,
aes(x = smoker,
y = rt_hand_d)) +
geom_jitter() +
facet_wrap(~smoker, scales = "free") ggplot(blomkvist, aes(x = age, y = rt_hand_d, label = sex)) + geom_text(size = 3)
ggplot(blomkvist, aes(x = age, y = rt_hand_d, shape = sex)) + geom_point(size = 3)
ggplot(blomkvist, aes(x = age, y = rt_hand_d, colour = sex)) + geom_point(size = 3)
ggplot(blomkvist, aes(x = age, y = rt_hand_d, colour = sex)) + stat_smooth(method = "lm", se = F)
ggplot(blomkvist, aes(x = age, y = rt_hand_d, linetype = sex)) + stat_smooth(method = "lm", se = F)
ggplot(blomkvist, aes(x = age, y = rt_hand_d, size = sex)) + stat_smooth(method = "lm", se = F)
Open script exercises/3a_aesthetics_and_attributes.R
If you have time continue with
geom_) control visual encoding of aesthetics layergeom_... are part of ggplot2[1] abline area bar bin_2d [5] bin2d blank boxplot col [9] column contour contour_filled count [13] crossbar curve density density_2d [17] density_2d_filled density2d density2d_filled dotplot [21] errorbar errorbarh freqpoly function [25] hex histogram hline jitter [29] label line linerange map [33] path point pointrange polygon [37] qq qq_line quantile raster [41] rect ribbon rug segment [45] sf sf_label sf_text smooth [49] spoke step text tile [53] violin vline
geoms in other packages such as tidybayes, ggbeeswarm, and ggridgesggplot(blomkvist, aes(x = rt_hand_d)) + geom_histogram()
ggplot(blomkvist, aes(x = rt_hand_d)) + geom_density()
ggplot(blomkvist, aes(x = rt_hand_d)) + geom_density() + geom_rug()
Open script exercises/4a_major_viz_tools.R
Continue with exercises/4b_major_viz_tools.R
labstitlesubtitlecaptiontagxycolour, shape, linetype, fillggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) + geom_point() + labs()
labstitlesubtitlecaptiontagxycolour, shape, linetype, fillggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) + geom_point() + labs(title = "My scatter plot")
labstitlesubtitlecaptiontagxycolour, shape, linetype, fillggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) +
geom_point() +
labs(title = "My scatter plot",
subtitle = "I'm a subtitle")
labstitlesubtitlecaptiontagxycolour, shape, linetype, fillggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) + geom_point() + labs(caption = "Caption for data source")
labstitlesubtitlecaptiontagxycolour, shape, linetype, fillggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) + geom_point() + labs(tag = "A")
labstitlesubtitlecaptiontagxycolour, shape, linetype, fillggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) +
geom_point() +
labs(x = "Age in years",
y = "Reaction time in msecs")
labstitlesubtitlecaptiontagxycolour, shape, linetype, fillggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) + geom_point() + labs(colour = "Legend\ntitle:")
themes() or using wrapper functionsggplot wrappers:[1] "theme_bw" "theme_classic" "theme_dark" "theme_grey" [5] "theme_light" "theme_linedraw" "theme_minimal" "theme_void"
ggthemes for more themes:[1] "theme_base" "theme_calc" "theme_clean" [4] "theme_economist" "theme_economist_white" "theme_excel" [7] "theme_excel_new" "theme_few" "theme_fivethirtyeight" [10] "theme_foundation" "theme_gdocs" "theme_hc" [13] "theme_map" "theme_pander" "theme_par" [16] "theme_solarized" "theme_solarized_2" "theme_solid" [19] "theme_stata" "theme_stata_base" "theme_stata_colors" [22] "theme_tufte" "theme_wsj"
ggplot2 default)ggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker) + theme_grey(base_size = 11)
ggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker) + theme_minimal(base_size = 14)
ggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker) + theme_light(base_size = 14)
ggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker) + theme_dark(base_size = 14)
ggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker) + ggthemes::theme_clean()
ggsave("name of plot.png", width = 5, height = 5)
.eps, .pdf, .svg, .wmf, .png, .jpg, .bmp, .tiffOpen script exercises/5a_bringing_everything_together.R
Continue with exercises/5b_bringing_everything_together.R
ggplot2 CheatSheetscale_colour_discretescale_colour_continuousscale_colour_manualcolourggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) +
geom_point() +
scale_colour_discrete(
labels = c("ex-smoker", "non-smoker", "smoker"))
ggthemesggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) +
geom_point() +
scale_colour_manual(
labels = c("ex-smoker", "non-smoker", "smoker"),
values = c("firebrick", "turquoise2", "cornflowerblue"))
ggthemesggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) +
geom_point() +
scale_colour_manual(
labels = c("ex-smoker", "non-smoker", "smoker"),
values = c("firebrick", "turquoise2", "cornflowerblue"))
ggthemes# RGB codes of "colorblind" function
mycolours <- c("#000000", "#E69F00", "#56B4E9", "#009E73",
"#F0E442", "#0072B2", "#D55E00", "#CC79A7")# RGB codes of "colorblind" function scales::show_col(colorblind_pal()(8))
ggthemesggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) +
geom_point() +
scale_colour_manual(
labels = c("ex-smoker", "non-smoker", "smoker"),
values = mycolours[1:3])
ggthemesggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) +
geom_point() +
scale_colour_colorblind(
labels = c("ex-smoker", "non-smoker", "smoker"))
ggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker)
ggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker, labeller = label_both)
blomkvist <- mutate(blomkvist,
smoker = recode(smoker,
"former" = "Ex-smoker",
"no" = "Non-smoker",
"yes" = "Smoker"))
blomkvist <- mutate(blomkvist,
smoker = recode(smoker,
"former" = "Ex-smoker",
"no" = "Non-smoker",
"yes" = "Smoker"))
axislegendpanelplotstripggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + theme()
axisaxis.text
axis.text.xaxis.text.yaxis.title
axis.title.xaxis.title.yggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + theme(axis.text = element_text(face = "italic"))
axisaxis.text
axis.text.xaxis.text.yaxis.title
axis.title.xaxis.title.yggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + theme(axis.title = element_text(face = "bold"))
axisaxis.text
axis.text.xaxis.text.yaxis.title
axis.title.xaxis.title.yggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + theme(axis.title.y = element_text(face = "bold"))
legendlegend.backgroundlegend.marginlegend.spacinglegend.keylegend.textlegend.titlelegend.positionlegend.orientationlegend.justificationlegend.boxggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) + geom_point() + theme()
legendlegend.backgroundlegend.marginlegend.spacinglegend.keylegend.textlegend.titlelegend.positionlegend.orientationlegend.justificationlegend.boxggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) + geom_point() + theme(legend.position = "top")
legendlegend.backgroundlegend.marginlegend.spacinglegend.keylegend.textlegend.titlelegend.positionlegend.orientationlegend.justificationlegend.boxggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) +
geom_point() +
theme(legend.position = "top",
legend.justification = "right")
legendlegend.backgroundlegend.marginlegend.spacinglegend.keylegend.textlegend.titlelegend.positionlegend.orientationlegend.justificationlegend.boxggplot(blomkvist, aes(y = rt_hand_d, x = age, colour = smoker)) + geom_point() + theme(legend.position = c(.15,.8))
panelpanel.backgroundpanel.borderpanel.spacingpanel.grid
panel.grid.majorpanel.grid.minorggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + theme()
panelpanel.backgroundpanel.borderpanel.spacingpanel.grid
panel.grid.majorpanel.grid.minorggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + theme(panel.background = element_blank())
plotplot.backgroundplot.marginplot.titleplot.subtitleplot.captionplot.tagggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + theme()
plotplot.backgroundplot.marginplot.titleplot.subtitleplot.captionplot.tagggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + theme(plot.background = element_rect(fill = "pink"))
plotplot.backgroundplot.marginplot.titleplot.subtitleplot.captionplot.tagggplot(blomkvist, aes(y = rt_hand_d, x = age)) +
geom_point() +
theme(plot.background = element_rect(fill = "pink"),
plot.margin = unit(c(2,2,2,2), "cm"))
plotplot.backgroundplot.marginplot.titleplot.subtitleplot.captionplot.tagggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + labs(title = "I'm a title") + theme(plot.title = element_text(colour = "pink"))
plotplot.backgroundplot.marginplot.titleplot.subtitleplot.captionplot.tagggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + labs(caption = "I'm a caption") + theme(plot.caption = element_text(face = "italic"))
strip.backgroundstrip.placementstrip.textggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker, labeller = label_both) + theme()
strip.backgroundstrip.backgroundstrip.placementstrip.textggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker, labeller = label_both) + theme(strip.background = element_blank())
strip.backgroundstrip.backgroundstrip.placementstrip.textggplot(blomkvist, aes(y = rt_hand_d, x = age)) + geom_point() + facet_grid(~smoker, labeller = label_both) + theme(strip.background = element_rect(fill = "forestgreen"))
strip.textstrip.backgroundstrip.placementstrip.textggplot(blomkvist, aes(y = rt_hand_d, x = age)) +
geom_point() +
facet_grid(~smoker, labeller = label_both) +
theme(strip.background = element_rect(fill = "forestgreen"),
strip.text = element_text(colour = "white", hjust = 0))
strip.textstrip.backgroundstrip.placementstrip.textggplot(blomkvist, aes(y = rt_hand_d, x = age)) +
geom_point() +
facet_grid(~smoker, labeller = label_both) +
theme(strip.background = element_rect(fill = "forestgreen"),
strip.text = element_text(colour = "white", hjust = 0,
face = "bold", size = 16,
angle = 180))
Andrews, Mark. 2021. Doing data science in R: An Introduction for Social Scientists. London, UK: SAGE Publications Ltd.
Anscombe, Francis J. 1973. “Graphs in Statistical Analysis.” The American Statistician 27: 17–21.
Hartwig, Frederick, and Brian E. Dearing. 1979. Exploratory Data Analysis. 16. Sage.
Matejka, Justin, and George Fitzmaurice. 2017. “Same Stats, Different Graphs: Generating Datasets with Varied Appearance and Identical Statistics Through Simulated Annealing.” In Proceedings of the 2017 CHI Conference on Human Factors in Computing Systems, 1290–94.
Tufte, Edward R. 1983. The Visual Display of Information. Cheshire, Ct: Graphics Press.
———. 1989. The Visual Display of Quantitative Information. Vol. 13–14. Graphic Press.
Tukey, John W. 1977. Exploratory Data Analysis. Vol. 2.
Wickham, Hadley. 2010. “A Layered Grammar of Graphics.” Journal of Computational and Graphical Statistics 19 (1): 3–28.
———. 2016. Ggplot2: Elegant Graphics for Data Analysis. Springer.
Wickham, Hadley, and Garrett Grolemund. 2016. R for Data Science: Import, Tidy, Transform, Visualize, and Model Data. O’Reilly Media, Inc.
Wilkinson, Leland. 1999. The Grammar of Graphics. Springer.
Wong, Bang. 2010. “Points of View: Design of Data Figures.” Nature Methods 7 (9): 665.