#Read in file and install ggpubr
library(ggplot2)
library(ggpubr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
gap <- read.csv("gapminderData5.csv")
str(gap)
## 'data.frame': 1704 obs. of 6 variables:
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ continent: chr "Asia" "Asia" "Asia" "Asia" ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ gdpPercap: num 779 821 853 836 740 ...
gap07 <- gap%>%
filter(year == 2007 & continent != "Oceania")
#Scatter plot
ggplot(gap07, aes(x = gdpPercap, y = lifeExp, col = continent)) + geom_point() + scale_x_log10("GDP per capita ($)") + scale_y_continuous("Life Expectancy (yrs)") + ggtitle("GapMinder Data 2007")
Now the same with ggpubr.
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent", xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)", main = "GapMinder Data 2007") + xscale("log10", .format = TRUE)
#Histograms
gghistogram(gap07, x ="lifeExp", main = "GapMinder Life Expectancy")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
Separate continents:
gghistogram(gap07, x = "lifeExp", fill = "continent", main = "GapMinder Life Expectancy")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
#Palettes
gghistogram(gap07, x = "lifeExp", fill = "continent", main = "GapMinder Life Expectancy", palette = "npg")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
gghistogram(gap07, x = "lifeExp", fill = "continent", main = "GapMinder Life Expectancy", palette = "lancet")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
gghistogram(gap07, x = "lifeExp", fill = "continent", main = "GapMinder Life Expectancy", palette = "rickandmorty")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
#Density plots
ggdensity(gap07, x = "lifeExp", fill = "continent", main = "GapMinder Life Expectancy", palette = "jco")
ggdensity(gap07, x = "lifeExp", fill = "continent", main = "GapMinder Life Expectancy", paelette = "jco", facet.by = "continent")
ggdensity(gap07, x = "lifeExp", fill = "continent", main = "GapMinder Life Expectancy", palette = "jco", facet.by = "continent", add = "median", rug = TRUE)
#Violin plots
ggviolin(gap07, x = "continent", y = "lifeExp")
ggviolin(gap07, x = "continent", y = "lifeExp", fill = "continent", palette = "jco", add = c("boxplot", "jitter"), ylab = "Life expectancy (yrs)")
Add rotate so violins are on their side.
ggviolin(gap07, x = "continent", y = "lifeExp", fill = "continent", palette = "jco", add = c("boxplot", "jitter"), ylab = "Life expectancy (yrs)", rotate = TRUE)
, #Bar plots
ggbarplot(gap07, x = "country", y = "lifeExp")
Some adjustments:
ggbarplot(gap07, x = "country", y = "lifeExp", fill = 'dark orange', facet.by = "continent", palette = "jco", x.text.angle = 90, ylab = "Life expectancy (yrs)", xlab = "Country", rotate = TRUE) + font("x.text", size = 4)
ggbarplot(gap07, x = "country", y = "lifeExp", fill = "continent", palette = "jco", Sort.val = "desc", sort.by.groups = FALSE, x.text.angle = 90, ylab = "Life expectancy (yrs)", xlab = "Country") + font("x.text", size = 4)
An alternative is dot plots
ggdotchart(gap07, x = "country", y = "lifeExp", color = "continent", palette = "jco", sorting = "descending", rotate = TRUE, group = "continent", add = "segments", ylab = "Life expectancy (yrs)", xlab = "Country") + font("y.text", size = 4)
gap_sub = gap %>%
filter(continent %in% c("Asia", "Africa"), year %in% c(1957, 1982, 2007))
Here’s another one:
ggboxplot(gap_sub, x = "continent", y ="lifeExp", ylab = "Years", col = "continent", add = "jitter")
ggboxplot(gap_sub, x = "continent", y = "lifeExp", ylab = "Years", col = "continent", add = "jitter") + stat_compare_means(method = "t.test", label.y = 90)
ggboxplot(gap_sub, x = "continent", y = "lifeExp", ylab = "Years", col = "continent", add = "jitter") + stat_compare_means(method = "t.test", label.y = 90)
ggboxplot(gap_sub, x = "continent", y = "lifeExp", ylab = "Years", col = "continent", add = "jitter", facet.by = "year") + stat_compare_means(method = "t.test", label.y = 90)
#Multiple groups
ggboxplot(gap_sub, x = "year", y = "lifeExp") + stat_compare_means(label.y = 80, method = "anova")
comps = list( c('1957', '1982'),
c('1957', '2007'),
c('1982', '2007'))
comps
## [[1]]
## [1] "1957" "1982"
##
## [[2]]
## [1] "1957" "2007"
##
## [[3]]
## [1] "1982" "2007"
Another boxplot:
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") + stat_compare_means(method = "t.test", comparisons = comps, bracket.size = .6, size = 4)
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") + stat_compare_means(method = "t.test", comparisons = comps, bracket.size = .6, size = 4)
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") + stat_compare_means(method = "t.test", comparisons = comps, bracket.size = .6, size = 4) + stat_compare_means(label.y = 110, method = "anova")
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") + stat_compare_means(method = "t.test", comparisons = comps, bracket.size = .6, size = 4)
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") + stat_compare_means(method = "t.test", comparisons = comps, bracket.size = .6, size = 4) + stat_compare_means(label.y = 110, method = "anova")
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") + stat_compare_means(method = "t.test", comparisons = comps, bracket.size = .6, size = 4) + stat_compare_means(label.y = 110, method = "anova")
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") + stat_compare_means(method = "t.test", ref.group = "1957") + stat_compare_means(label.y = 110, method = "anova")
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years", facet.by = "continent") + stat_compare_means(label = "p.signif", method = "t.test", ref.group = "1957") + stat_compare_means(label.y = 110, method = "anova")