list.files()
[1] "gapminderData5.csv" "m15_script.R"
[3] "Strange_m15_quarto_files" "Strange_m15_quarto.html"
[5] "Strange_m15_quarto.qmd" "Strange_m15_quarto.rmarkdown"
06/19/2025
Vivian Strange - GEOG 5680
Deliverable: code + notes
setwd(“~/Desktop/geog5680/Module Deliverables/Module 15”)
list.files()
[1] "gapminderData5.csv" "m15_script.R"
[3] "Strange_m15_quarto_files" "Strange_m15_quarto.html"
[5] "Strange_m15_quarto.qmd" "Strange_m15_quarto.rmarkdown"
library(ggplot2)
library(ggpubr)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(ggExtra)
= read.csv("gapminderData5.csv")
gap str(gap)
'data.frame': 1704 obs. of 6 variables:
$ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
$ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
$ pop : num 8425333 9240934 10267083 11537966 13079460 ...
$ continent: chr "Asia" "Asia" "Asia" "Asia" ...
$ lifeExp : num 28.8 30.3 32 34 36.1 ...
$ gdpPercap: num 779 821 853 836 740 ...
= gap |>
gap07 filter(year == 2007 & continent != "Oceania")
ggplot(gap07, aes(x = gdpPercap, y = lifeExp, col = continent)) +
geom_point() + scale_x_log10("GDP per capita ($)") +
scale_y_continuous("Life Expectancy (yrs)") + ggtitle("GapMinder Data 2007")
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007") +
xscale("log10", .format = TRUE)
Add labels to each point
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", label = "country", repel = TRUE) +
xscale("log10", .format = TRUE)
Select a handful of points to label
= c("United States", "China", "Germany")
sel_countries ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", label = "country",
label.select = sel_countries, repel = TRUE) +
xscale("log10", .format = TRUE)
Show the distribution of points using a ‘rug’ (position of each observation)
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007") +
xscale("log10", .format = TRUE)
Marginal histogram
library(ggExtra)
<- ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
p xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007") +
xscale("log10", .format = TRUE)
ggMarginal(p, type = "histogram")
Add a regression line (note: it automatically fits one per color group)
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) +
xscale("log10", .format = TRUE)
Add correlations
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) +
xscale("log10", .format = TRUE) +
stat_cor(aes(color = continent), method = "spearman")
Add regression line equations
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) +
xscale("log10", .format = TRUE) +
stat_regline_equation(aes(color = continent))
gghistogram(gap07, x = "lifeExp", main = "GapMinder Life Expectancy")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
gghistogram(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
gghistogram(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "npg")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
gghistogram(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "lancet")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
gghistogram(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "jco")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
gghistogram(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "aaas")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
gghistogram(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "rickandmorty")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
ggdensity(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "jco")
ggdensity(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "jco",
facet.by = "continent")
ggdensity(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "jco",
facet.by = "continent",
add = "median", rug = TRUE)
ggviolin(gap07, x = "continent", y = "lifeExp")
Use the jco palette to add a fill by continent.
Overlay a box and whisker plot for each continent to illustrate where the median and quartiles lie.
Add a jitter function which adds the observed values.
ggviolin(gap07, x = "continent", y = "lifeExp",
fill = "continent", palette = "jco",
add = c("boxplot", "jitter"),
ylab = "Life expectancy (yrs)")
Make the violins horizontal by adding the rotate argument
ggviolin(gap07, x = "continent", y = "lifeExp",
fill = "continent", palette = "jco",
add = c("boxplot", "jitter"),
ylab = "Life expectancy (yrs)",
rotate = TRUE)
ggbarplot(gap07,
x = "country",
y = "lifeExp")
Add a fill by continent.
Rotate the country labels and reduce the font size.
Add axis labels.
ggbarplot(gap07,
x = "country",
y = "lifeExp",
fill = "continent",
palette = "jco",
x.text.angle = 90,
ylab = "Life expectancy (yrs)",
xlab = "Country") +
font("x.text", size = 4)
Order the bars by value using sort.val()
Note: if you set the sort.by.groups argument to true, this will first group all countries from a continent together, then sort them.
ggbarplot(gap07,
x = "country",
y = "lifeExp",
fill = "continent",
palette = "jco",
sort.val = "desc",
sort.by.groups = FALSE,
x.text.angle = 90,
ylab = "Life expectancy (yrs)",
xlab = "Country") +
font("x.text", size = 4)
Note: adding segments forces the origin to zero
ggdotchart(gap07,
x = "country",
y = "lifeExp",
color = "continent",
palette = "jco",
sorting = "descending",
rotate = TRUE,
group = "continent",
add = "segments",
ylab = "Life expectancy (yrs)",
xlab = "Country") +
font("y.text", size = 4)
= gap %>%
gap_sub filter(continent %in% c("Asia", "Africa"),
%in% c(1957, 1982, 2007)) year
ggboxplot(gap_sub, x = "continent", y = "lifeExp",
ylab = "Years", col = "continent", add = "jitter")
Use stat_compare_means to test if the life expectancy differs between the two continents.
The height at which the comparison results will be placed is specified by the label.y argument
ggboxplot(gap_sub, x = "continent", y = "lifeExp",
ylab = "Years", col = "continent", add = "jitter") +
stat_compare_means(label.y = 90)
The Wilcoxon rank sum test is the default test for two groups.
Low P-value: the means are different between the two continents.
Elect t-test: specified by argument, method = “t.test”
ggboxplot(gap_sub, x = "continent", y = "lifeExp",
ylab = "Years", col = "continent", add = "jitter") +
stat_compare_means(method = "t.test", label.y = 90)
Facet boxplots by year: 3 separate p-values allow for comparison.
ggboxplot(gap_sub, x = "continent", y = "lifeExp",
ylab = "Years", col = "continent", add = "jitter", facet.by = "year") +
stat_compare_means(method = "t.test", label.y = 90)
ggboxplot(gap_sub, x = "year", y = "lifeExp") +
stat_compare_means(label.y = 80, method = "anova")
= list( c('1957', '1982'),
comps c('1957', '2007'),
c('1982', '2007'))
comps
[[1]]
[1] "1957" "1982"
[[2]]
[1] "1957" "2007"
[[3]]
[1] "1982" "2007"
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") +
stat_compare_means(method = "t.test", comparisons = comps,
bracket.size = .6, size = 4)
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") +
stat_compare_means(method = "t.test", comparisons = comps,
bracket.size = .6, size = 4) +
stat_compare_means(label.y = 110, method = "anova")
Add a facet.by argument to the ggboxplot function
Note: between 1982 and 2007, there has not been a significant increase in African life expectancy
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") +
stat_compare_means(method = "t.test", comparisons = comps,
bracket.size = .6, size = 4) +
stat_compare_means(label.y = 110, method = "anova")
Replace the list of comparisons in the first stat_compare_means() function with a reference groups (here, the year 1957):
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") +
stat_compare_means(method = "t.test", ref.group = "1957") +
stat_compare_means(label.y = 110, method = "anova")
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") +
stat_compare_means(label = "p.signif", method = "t.test",
ref.group = "1957") +
stat_compare_means(label.y = 110, method = "anova")