ggpubr figures
#attaching libraries and correct working directory
getwd()
## [1] "C:/GEOG 5680/module15"
list.files()
## [1] "gapminderData5.csv" "GEOG_5680_15a_ggpubr.html"
## [3] "module15" "r_markdown.Rmd"
library(ggplot2)
library(ggpubr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#reading data and filtering correct year and continent
gap = read.csv("gapminderData5.csv")
str(gap)
## 'data.frame': 1704 obs. of 6 variables:
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ continent: chr "Asia" "Asia" "Asia" "Asia" ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ gdpPercap: num 779 821 853 836 740 ...
gap07 = gap %>%
filter(year == 2007 & continent != "Oceania")
Scatter plots
ggplot(gap07, aes(x = gdpPercap, y = lifeExp, col = continent)) +
geom_point() + scale_x_log10("GDP per capita ($)") +
scale_y_continuous("Life Expectancy (yrs)") + ggtitle("GapMinder Data 2007")
#first ggpubr fucntion
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007") +
xscale("log10", .format = TRUE)
#adding labels to each point
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", label = "country", repel = TRUE) +
xscale("log10", .format = TRUE)
#select a handful of points to label
sel_countries = c("United States", "China", "Germany")
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", label = "country",
label.select = sel_countries, repel = TRUE) +
xscale("log10", .format = TRUE)
#the distribution of points can be shown using a 'rug' -the position of each observation
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007") +
xscale("log10", .format = TRUE)
#or a marginal histogram
library(ggExtra)
p <- ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007") +
xscale("log10", .format = TRUE)
ggMarginal(p, type = "histogram")
#adding a regression line
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) +
xscale("log10", .format = TRUE)
#adding correlations
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) +
xscale("log10", .format = TRUE) +
stat_cor(aes(color = continent), method = "spearman")
#adding regression line equations
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) +
xscale("log10", .format = TRUE) +
stat_regline_equation(aes(color = continent))
Histograms
#to see distribution of life expectancy values
gghistogram(gap07, x = "lifeExp", main = "GapMinder Life Expectancy")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
#add fill argument to separate the continents
gghistogram(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
Palettes
#using default npg() palette
gghistogram(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "npg")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
#trying another color palette
gghistogram(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "lancet")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
Density plots
ggdensity(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "jco")
#using facet.by argument to produce multiple plots
ggdensity(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "jco",
facet.by = "continent")
#adding a rug plot
ggdensity(gap07, x = "lifeExp", fill = "continent",
main = "GapMinder Life Expectancy", palette = "jco",
facet.by = "continent",
add = "median", rug = TRUE)
Violin plots
ggviolin(gap07, x = "continent", y = "lifeExp")
#improved violin plot
ggviolin(gap07, x = "continent", y = "lifeExp",
fill = "continent", palette = "jco",
add = c("boxplot", "jitter"),
ylab = "Life expectancy (yrs)")
#adding the rotate function
ggviolin(gap07, x = "continent", y = "lifeExp",
fill = "continent", palette = "jco",
add = c("boxplot", "jitter"),
ylab = "Life expectancy (yrs)",
rotate = TRUE)
Barplots
ggbarplot(gap07,
x = "country",
y = "lifeExp")
#Some barplot modifications
ggbarplot(gap07,
x = "country",
y = "lifeExp",
fill = "continent",
palette = "jco",
x.text.angle = 90,
ylab = "Life expectancy (yrs)",
xlab = "Country") +
font("x.text", size = 4)
#now, order by value
ggbarplot(gap07,
x = "country",
y = "lifeExp",
fill = "continent",
palette = "jco",
sort.val = "desc",
sort.by.groups = FALSE,
x.text.angle = 90,
ylab = "Life expectancy (yrs)",
xlab = "Country") +
font("x.text", size = 4)
#sorting by continent
ggdotchart(gap07,
x = "country",
y = "lifeExp",
color = "continent",
palette = "jco",
sorting = "descending",
rotate = TRUE,
group = "continent",
add = "segments",
ylab = "Life expectancy (yrs)",
xlab = "Country") +
font("y.text", size = 4)
Adding Comparisons to Plots
# making subset for only afrcian and Asian countries for three of the years
gap_sub = gap %>%
filter(continent %in% c("Asia", "Africa"),
year %in% c(1957, 1982, 2007))
#make boxplot for the life expectancy for two continents
ggboxplot(gap_sub, x = "continent", y = "lifeExp",
ylab = "Years", col = "continent", add = "jitter")
#comparing means between two continents
ggboxplot(gap_sub, x = "continent", y = "lifeExp",
ylab = "Years", col = "continent", add = "jitter") +
stat_compare_means(label.y = 90)
#using a t-test to compare
ggboxplot(gap_sub, x = "continent", y = "lifeExp",
ylab = "Years", col = "continent", add = "jitter") +
stat_compare_means(method = "t.test", label.y = 90)
#facet boxplot by years
ggboxplot(gap_sub, x = "continent", y = "lifeExp",
ylab = "Years", col = "continent", add = "jitter", facet.by = "year") +
stat_compare_means(method = "t.test", label.y = 90)
Multiple Groups
#comparing multiple groups using ANOVA
ggboxplot(gap_sub, x = "year", y = "lifeExp") +
stat_compare_means(label.y = 80, method = "anova")
#making pairwise functions
comps = list( c('1957', '1982'),
c('1957', '2007'),
c('1982', '2007'))
comps
## [[1]]
## [1] "1957" "1982"
##
## [[2]]
## [1] "1957" "2007"
##
## [[3]]
## [1] "1982" "2007"
#make boxplot between years using the comps from last section
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") +
stat_compare_means(method = "t.test", comparisons = comps,
bracket.size = .6, size = 4)
#add second compare_stat_means() function to include original comparison
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") +
stat_compare_means(method = "t.test", comparisons = comps,
bracket.size = .6, size = 4) +
stat_compare_means(label.y = 110, method = "anova")
#add facet.by() to look at comparison individually fro the two continents
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") +
stat_compare_means(method = "t.test", comparisons = comps,
bracket.size = .6, size = 4) +
stat_compare_means(label.y = 110, method = "anova")
#alternative to pairwise comaprisons
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") +
stat_compare_means(method = "t.test", ref.group = "1957") +
stat_compare_means(label.y = 110, method = "anova")
#to get rid of overlap on test results
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") +
stat_compare_means(label = "p.signif", method = "t.test",
ref.group = "1957") +
stat_compare_means(label.y = 110, method = "anova")
ggscatter(gap07, x = "gdpPercap", y = "lifeExp",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007",
add = "reg.line") +
xscale("log10", .format = TRUE)
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
main = "GapMinder Data 2007") +
xscale("log10", .format = TRUE)