More ggPlot

Adrian Cisneros

2022-10-28

Adrian Cisneros

#MoreGGplot assignment

library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
#> ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
#> ✔ tibble  3.1.8      ✔ dplyr   1.0.10
#> ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
#> ✔ readr   2.1.3      ✔ forcats 0.5.2 
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
library(gapminder)
library(socviz)
p <- ggplot(data = gss_sm,
            mapping = aes(x = bigregion))
p + geom_bar()

#This is the first plot that is asked of us to graph from the example.

p <- ggplot(data = gss_sm,
            mapping = aes(x = bigregion))
p + geom_bar(mapping = aes(y = ..prop..))

#This being the second graph

p <- ggplot(data = gss_sm,
            mapping = aes(x = bigregion))
p + geom_bar(mapping = aes(y = ..prop.., group = 1))

#And, last but not least the third set.

Histograms and Density Plots

We then move on to make a histogram and density plots:
p <- ggplot(data = midwest,
            mapping = aes(x = area))
p + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p <- ggplot(data = midwest,
            mapping = aes(x = area))
p + geom_histogram(bins = 10)

oh_wi <- c("OH", "WI")

p <- ggplot(data = subset(midwest, subset = state %in% oh_wi),
            mapping = aes(x = percollege, fill = state))
p + geom_histogram(alpha = 0.4, bins = 20)

p <- ggplot(data = midwest,
            mapping = aes(x = area))
p + geom_density()

p <- ggplot(data = midwest,
            mapping = aes(x = area, fill = state, color = state))
p + geom_density(alpha = 0.3)

titanic
#>       fate    sex    n percent
#> 1 perished   male 1364    62.0
#> 2 perished female  126     5.7
#> 3 survived   male  367    16.7
#> 4 survived female  344    15.6
p <- ggplot(data = titanic,
            mapping = aes(x = fate, y = percent, fill = sex))
p + geom_bar(position = "dodge", stat = "identity") + theme(legend.position = "top")

oecd_sum
#> # A tibble: 57 × 5
#> # Groups:   year [57]
#>     year other   usa  diff hi_lo
#>    <int> <dbl> <dbl> <dbl> <chr>
#>  1  1960  68.6  69.9 1.30  Below
#>  2  1961  69.2  70.4 1.20  Below
#>  3  1962  68.9  70.2 1.30  Below
#>  4  1963  69.1  70   0.900 Below
#>  5  1964  69.5  70.3 0.800 Below
#>  6  1965  69.6  70.3 0.700 Below
#>  7  1966  69.9  70.3 0.400 Below
#>  8  1967  70.1  70.7 0.600 Below
#>  9  1968  70.1  70.4 0.300 Below
#> 10  1969  70.1  70.6 0.5   Below
#> # … with 47 more rows
p <- ggplot(data = oecd_sum,
            mapping = aes(x = year, y = diff, fill = hi_lo))
p + geom_col() + guides(fill = FALSE) +
  labs(x = NULL, y = "Difference in Years",
       title = "The US Life Expectancy Gap",
       subtitle = "Difference between US and OECD average life expectancies, 1960-2015",
       caption = "Data: OECD. After a chart by Christopher Ingraham,
                  Washington Post, December 27th 2017.")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
#> Warning: Removed 1 rows containing missing values (position_stack).

Frequency Plots the Awkward Way: Don’t do this any more than absolutely necessary

with(gss_sm, table(religion))
#> religion
#> Protestant   Catholic     Jewish       None      Other 
#>       1371        649         51        619        159
p <- ggplot(data = gss_sm,
            mapping = aes(x = religion, color = religion))
p + geom_bar()

p <- ggplot(data = gss_sm,
            mapping = aes(x = religion, fill = religion))
p + geom_bar() + guides(fill = FALSE)
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.

p <- ggplot(data = gss_sm,
            mapping = aes(x = bigregion, 
                          fill = religion))
p + geom_bar()

p <- ggplot(data = gss_sm,
            mapping = aes(x = bigregion, 
                          fill = religion))
p + geom_bar(position = "dodge",
             mapping = aes(y = ..prop..))

p <- ggplot(data = gss_sm,
            mapping = aes(x = bigregion, 
                          fill = religion))
p + geom_bar(position = "dodge",
             mapping = aes(y = ..prop.., 
                           group = religion))

Dplyr pipelines: Much better

rel_by_region <- gss_sm %>%
    group_by(bigregion, religion) %>%
    tally() %>%
    mutate(freq = n / sum(n),
           pct = round((freq*100), 1))
rel_by_region
#> # A tibble: 24 × 5
#> # Groups:   bigregion [4]
#>    bigregion religion       n    freq   pct
#>    <fct>     <fct>      <int>   <dbl> <dbl>
#>  1 Northeast Protestant   158 0.324    32.4
#>  2 Northeast Catholic     162 0.332    33.2
#>  3 Northeast Jewish        27 0.0553    5.5
#>  4 Northeast None         112 0.230    23  
#>  5 Northeast Other         28 0.0574    5.7
#>  6 Northeast <NA>           1 0.00205   0.2
#>  7 Midwest   Protestant   325 0.468    46.8
#>  8 Midwest   Catholic     172 0.247    24.7
#>  9 Midwest   Jewish         3 0.00432   0.4
#> 10 Midwest   None         157 0.226    22.6
#> # … with 14 more rows

Avoiding Legends

p <- ggplot(data = rel_by_region, 
            mapping = aes(x = pct, y = religion, fill = religion))
p + geom_col() +
    labs(x = "Percent", y = NULL) +
    guides(fill = FALSE) + 
    facet_wrap(~ bigregion, nrow = 1)
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.

n(),tally(), andcount()

gss_sm %>%
    group_by(bigregion, religion) %>%
    summarize(n = n())
#> `summarise()` has grouped output by 'bigregion'. You can override using the
#> `.groups` argument.
#> # A tibble: 24 × 3
#> # Groups:   bigregion [4]
#>    bigregion religion       n
#>    <fct>     <fct>      <int>
#>  1 Northeast Protestant   158
#>  2 Northeast Catholic     162
#>  3 Northeast Jewish        27
#>  4 Northeast None         112
#>  5 Northeast Other         28
#>  6 Northeast <NA>           1
#>  7 Midwest   Protestant   325
#>  8 Midwest   Catholic     172
#>  9 Midwest   Jewish         3
#> 10 Midwest   None         157
#> # … with 14 more rows
gss_sm %>%
    group_by(bigregion, religion) %>%
    tally()
#> # A tibble: 24 × 3
#> # Groups:   bigregion [4]
#>    bigregion religion       n
#>    <fct>     <fct>      <int>
#>  1 Northeast Protestant   158
#>  2 Northeast Catholic     162
#>  3 Northeast Jewish        27
#>  4 Northeast None         112
#>  5 Northeast Other         28
#>  6 Northeast <NA>           1
#>  7 Midwest   Protestant   325
#>  8 Midwest   Catholic     172
#>  9 Midwest   Jewish         3
#> 10 Midwest   None         157
#> # … with 14 more rows
gss_sm %>%
    count(bigregion, religion)
#> # A tibble: 24 × 3
#>    bigregion religion       n
#>    <fct>     <fct>      <int>
#>  1 Northeast Protestant   158
#>  2 Northeast Catholic     162
#>  3 Northeast Jewish        27
#>  4 Northeast None         112
#>  5 Northeast Other         28
#>  6 Northeast <NA>           1
#>  7 Midwest   Protestant   325
#>  8 Midwest   Catholic     172
#>  9 Midwest   Jewish         3
#> 10 Midwest   None         157
#> # … with 14 more rows

Kinds of Facets

p <- ggplot(data = gss_sm, mapping = aes(x = age, y = childs))
p + geom_point(alpha = 0.2) +
    geom_smooth() + facet_wrap(~ race)
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 18 rows containing non-finite values (stat_smooth).
#> Warning: Removed 18 rows containing missing values (geom_point).

p <- ggplot(data = gss_sm, 
             mapping = aes(x = age, y = childs))
p + geom_point(alpha = 0.2) +
    geom_smooth() + 
    facet_wrap(~ sex + race, nrow = 1)
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 18 rows containing non-finite values (stat_smooth).
#> Warning: Removed 18 rows containing missing values (geom_point).

p <- ggplot(data = gss_sm, mapping = aes(x = age, y = childs))
p + geom_point(alpha = 0.2) +
    geom_smooth(method = "lm") + facet_grid(bigregion ~ sex + race)
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 18 rows containing non-finite values (stat_smooth).
#> Warning: Removed 18 rows containing missing values (geom_point).

Organ Donor Data

organdata
#> # A tibble: 238 × 21
#>    country  year       donors   pop pop_d…¹   gdp gdp_lag health healt…² pubhe…³
#>    <chr>    <date>      <dbl> <int>   <dbl> <int>   <int>  <dbl>   <dbl>   <dbl>
#>  1 Austral… NA          NA    17065   0.220 16774   16591   1300    1224     4.8
#>  2 Austral… 1991-01-01  12.1  17284   0.223 17171   16774   1379    1300     5.4
#>  3 Austral… 1992-01-01  12.4  17495   0.226 17914   17171   1455    1379     5.4
#>  4 Austral… 1993-01-01  12.5  17667   0.228 18883   17914   1540    1455     5.4
#>  5 Austral… 1994-01-01  10.2  17855   0.231 19849   18883   1626    1540     5.4
#>  6 Austral… 1995-01-01  10.2  18072   0.233 21079   19849   1737    1626     5.5
#>  7 Austral… 1996-01-01  10.6  18311   0.237 21923   21079   1846    1737     5.6
#>  8 Austral… 1997-01-01  10.3  18518   0.239 22961   21923   1948    1846     5.7
#>  9 Austral… 1998-01-01  10.5  18711   0.242 24148   22961   2077    1948     5.9
#> 10 Austral… 1999-01-01   8.67 18926   0.244 25445   24148   2231    2077     6.1
#> # … with 228 more rows, 11 more variables: roads <dbl>, cerebvas <int>,
#> #   assault <int>, external <int>, txp_pop <dbl>, world <chr>, opt <chr>,
#> #   consent_law <chr>, consent_practice <chr>, consistent <chr>, ccode <chr>,
#> #   and abbreviated variable names ¹​pop_dens, ²​health_lag, ³​pubhealth
organdata %>% 
  ggplot(mapping = aes(x = year, y = donors, group = country)) + 
  geom_line() + 
  facet_wrap(~ reorder(country, -donors, mean, na.rm = TRUE))
#> Warning: Removed 34 row(s) containing missing values (geom_path).

organdata %>% 
  filter(country != "Spain") %>% 
  ggplot(mapping = aes(x = donors, 
                       y = reorder(country, donors, na.rm = TRUE))) + 
  geom_boxplot() + 
  facet_wrap(~ donors, scales = "free_y", ncol = 1)
#> Warning: Removed 32 rows containing non-finite values (stat_boxplot).
#> Warning: Position guide is perpendicular to the intended axis. Did you mean to
#> specify a different guide `position`?

#Using the Organ donor data set we will show some more information involving information about it!

organdata %>% 
ggplot(mapping = aes(x = donors,year , y= country)) +
 geom_point()
#> Warning: Removed 34 rows containing missing values (geom_point).

#The information makes it easier to follow as it shows a direct link from year to year data on Organ donor by country.

organdata %>% 
  ggplot(mapping = aes(x = year, y = donors)) + 
  geom_point()
#> Warning: Removed 34 rows containing missing values (geom_point).

organdata %>% 
  ggplot(mapping = aes(x = year, y = donors)) + 
  geom_smooth()
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#> Warning: Removed 34 rows containing non-finite values (stat_smooth).

#This data does not seem easy to understand as there is a lot of missing pieces that must be plugged in by the reader.

organdata %>% 
  ggplot(mapping = aes(x = year, y = donors)) + 
  geom_line() +
  facet_wrap(~ reorder(country, -donors, mean, na.rm = TRUE))
#> Warning: Removed 2 row(s) containing missing values (geom_path).

This version of the graphs looks a lot smoother and easier to understand.

organdata %>% 
  ggplot(mapping = aes(x = consent_law, y = consent_practice)) + 
  geom_point() +
  facet_wrap(~ reorder(country, -donors, mean, na.rm = TRUE))

And finally,

organdata %>% 
  ggplot(mapping = aes(x = roads, y = donors)) + 
  geom_line() + geom_line(method = "lm")
#> Warning: Ignoring unknown parameters: method
#> Warning: Removed 18 row(s) containing missing values (geom_path).
#> Removed 18 row(s) containing missing values (geom_path).

It seems as if the more roads are added, there tends to be more donors as well. As there are possibly different stories to each country, it may show that there are more available people to donate organs when there is more road access.