Module 15a Exercise

Author

u1535008

Loading the packages needed.

library(ggplot2)
library(ggpubr)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Loading the dataset.

gap <- read.csv("gapminderData5.csv")
str(gap)
'data.frame':   1704 obs. of  6 variables:
 $ country  : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
 $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
 $ pop      : num  8425333 9240934 10267083 11537966 13079460 ...
 $ continent: chr  "Asia" "Asia" "Asia" "Asia" ...
 $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
 $ gdpPercap: num  779 821 853 836 740 ...

Filter for 2007 and not Oceania

gap07 <- gap |>
  filter(year == 2007 & continent != "Oceania")

Making some scatterplots.

ggplot(gap07, aes(x = gdpPercap, y = lifeExp, col = continent)) + 
  geom_point() + scale_x_log10("GDP per capita ($)") +
  scale_y_continuous("Life Expectancy (yrs)") + ggtitle("GapMinder Data 2007")

ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
          xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
          main = "GapMinder Data 2007") +
  xscale("log10", .format = TRUE)

Adding labels to each/some points

ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
          xlab = "GDP per capita ($)", ylab = "Life Expectancy (yrs)",
          main = "GapMinder Data 2007", label = "country", repel = TRUE) +
  xscale("log10", .format = TRUE)

sel_countries = c("United States", "China", "Germany")
ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
          xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
          main = "GapMinder Data 2007", label = "country",
          label.select = sel_countries, repel = TRUE) +
  xscale("log10", .format = TRUE)

ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
          xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
          main = "GapMinder Data = 2007") + 
  xscale("log10", .format = TRUE)

Creating a marginal histogram

library(ggExtra)
p <- ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
               xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
               main = "GapMinder Data 2007") + 
  xscale("log10", .format = TRUE)
ggMarginal(p, type = "histogram")

Adding a regression line

ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
          xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
          main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) +
  xscale("log10", .format = TRUE)

Adding correlations

ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
          xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
          main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) +
  xscale("log10", .format = TRUE) +
  stat_cor(aes(color = continent), method = "spearman")

Adding the regression line equations

ggscatter(gap07, x = "gdpPercap", y = "lifeExp", col = "continent",
          xlab = "GDP per capita ($)", ylab = "Life expectancy (yrs)",
          main = "GapMinder Data 2007", add = "reg.line", conf.int = TRUE) +
  xscale("log10", .format = TRUE) +
  stat_regline_equation(aes(color = continent))

Generating histograms

gghistogram(gap07, x = "lifeExp", main = "GapMinder Life Expectancy")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.

gghistogram(gap07, x = "lifeExp", fill = "continent",
            main = "GapMinder Life Expectancy")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.

gghistogram(gap07, x = "lifeExp", fill = "continent",
            main = "GapMinder Life Expectancy", palette = "jco")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.

Generating density plots

ggdensity(gap07, x = "lifeExp", fill = "continent",
          main = "GapMinder Life Expectancy", palette = "jco")

ggdensity(gap07, x = "lifeExp", fill = "continent", 
          main = "GapMinder Life Expectancy", palette = "jco",
          facet.by = "continent")

ggdensity(gap07, x = "lifeExp", fill = "continent",
          main = "GapMinder Life Expectancy", palette = "jco",
          facet.by = "continent",
          add = "median", rug = TRUE)

Generating violin plots

ggviolin(gap07, x = "continent", y = "lifeExp")

ggviolin(gap07, x = "continent", y = "lifeExp",
         fill = "continent", palette = "jco",
         add = c("boxplot", "jitter"),
         ylab = "Life expectancy (yrs)")

ggviolin(gap07, x = "continent", y = "lifeExp",
         fill = "continent", palette = "jco",
         add = c("boxplot", "jitter"),
         ylab = "Life expectancy (yrs)",
         rotate = TRUE)

Generating bar plots

ggbarplot(gap07, x = "country", y = "lifeExp")

ggbarplot(gap07, 
          x = "country",
          y = "lifeExp",
          fill = "continent",
          pallete = "jco",
          x.text.angle = 90,
          ylab = "Life expectancy (yrs)",
          xlab = "Country") +
  font("x.text", size = 4)

ggbarplot(gap07, 
          x = "country",
          y = "lifeExp",
          fill = "continent",
          pallete = "jco",
          sort.val = "desc",
          sort.by.groups = FALSE,
          x.text.angle = 90,
          ylab = "Life expectancy (yrs)",
          xlab = "Country") +
  font("x.text", size = 4)

Generating dot plots

ggdotchart(gap07,
           x = "country",
           y = "lifeExp",
           color = "continent",
           palette = "jco",
           sorting = "descending",
           rotate = TRUE,
           group = "continent",
           add = "segments",
           ylab = "Life expectancy (yrs)",
           xlab = "Country") +
  font("y.text", size = 4)

Adding comparisons to plots

gap_sub <- gap |>
  filter(continent %in% c("Asia", "Africa"),
         year %in% c(1957, 1982, 2007))

ggboxplot(gap_sub, x = "continent", y = "lifeExp",
          ylab = "Years", col = "continent", add = "jitter")

ggboxplot(gap_sub, x = "continent", y = "lifeExp",
          ylab = "Years", col = "continent", add = "jitter") + 
  stat_compare_means(label.y = 90)

Doing a t-test instead

ggboxplot(gap_sub, x = "continent", y = "lifeExp",
          ylab = "Years", col = "continent", add = "jitter") +
  stat_compare_means(method = "t.test", label.y = 90)

ggboxplot(gap_sub, x = "continent", y = "lifeExp",
          ylab = "Years", col = "continent", add = "jitter", facet.by = "year") + 
  stat_compare_means(method = "t.test", label.y = 90)

Comparing multiple groups

ggboxplot(gap_sub, x = "year", y = "lifeExp") +
  stat_compare_means(label.y = 80, method = "anova")

comps = list( c("1957", "1982"),
              c("1957", "2007"),
              c("1982", "2007"))
comps
[[1]]
[1] "1957" "1982"

[[2]]
[1] "1957" "2007"

[[3]]
[1] "1982" "2007"
ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") +
  stat_compare_means(method = "t.test", comparisons = comps,
                     bracket.size = .6, size = 4)

ggboxplot(gap_sub, x = "year", y = "lifeExp", ylab = "Years") +
  stat_compare_means(method = "t.test", comparisons = comps,
                     bracket.size = .6, size = 4) +
  stat_compare_means(label.y = 110, method = "anova")

ggboxplot(gap_sub, x = "year", y = "lifeExp", 
          ylab = "Years", facet.by = "continent") +
  stat_compare_means(method = "t.test", comparisons = comps,
                     bracket.size = .6, size = 4) +
  stat_compare_means(label.y = 110, method = "anova")

ggboxplot(gap_sub, x = "year", y = "lifeExp", 
          ylab = "Years", facet.by = "continent") +
  stat_compare_means(method = "t.test", ref.group = "1957") +
  stat_compare_means(label.y = 110, method = "anova")

ggboxplot(gap_sub, x = "year", y = "lifeExp", 
          ylab = "Years", facet.by = "continent") +
  stat_compare_means(label = "p.signif", method = "t.test",
                     ref.group = "1957") +
  stat_compare_means(label.y = 110, method = "anova")