#MoreGGplot assignment
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
#> ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
#> ✔ tibble 3.1.8 ✔ dplyr 1.0.10
#> ✔ tidyr 1.2.1 ✔ stringr 1.4.1
#> ✔ readr 2.1.3 ✔ forcats 0.5.2
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
library(gapminder)
library(socviz)p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion))
p + geom_bar()#This is the first plot that is asked of us to graph from the example.
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion))
p + geom_bar(mapping = aes(y = ..prop..))#This being the second graph
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion))
p + geom_bar(mapping = aes(y = ..prop.., group = 1))#And, last but not least the third set.
We then move on to make a histogram and density plots:
p <- ggplot(data = midwest,
mapping = aes(x = area))
p + geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.p <- ggplot(data = midwest,
mapping = aes(x = area))
p + geom_histogram(bins = 10)oh_wi <- c("OH", "WI")
p <- ggplot(data = subset(midwest, subset = state %in% oh_wi),
mapping = aes(x = percollege, fill = state))
p + geom_histogram(alpha = 0.4, bins = 20)p <- ggplot(data = midwest,
mapping = aes(x = area))
p + geom_density()p <- ggplot(data = midwest,
mapping = aes(x = area, fill = state, color = state))
p + geom_density(alpha = 0.3)titanic
#> fate sex n percent
#> 1 perished male 1364 62.0
#> 2 perished female 126 5.7
#> 3 survived male 367 16.7
#> 4 survived female 344 15.6p <- ggplot(data = titanic,
mapping = aes(x = fate, y = percent, fill = sex))
p + geom_bar(position = "dodge", stat = "identity") + theme(legend.position = "top")oecd_sum
#> # A tibble: 57 × 5
#> # Groups: year [57]
#> year other usa diff hi_lo
#> <int> <dbl> <dbl> <dbl> <chr>
#> 1 1960 68.6 69.9 1.30 Below
#> 2 1961 69.2 70.4 1.20 Below
#> 3 1962 68.9 70.2 1.30 Below
#> 4 1963 69.1 70 0.900 Below
#> 5 1964 69.5 70.3 0.800 Below
#> 6 1965 69.6 70.3 0.700 Below
#> 7 1966 69.9 70.3 0.400 Below
#> 8 1967 70.1 70.7 0.600 Below
#> 9 1968 70.1 70.4 0.300 Below
#> 10 1969 70.1 70.6 0.5 Below
#> # … with 47 more rowsp <- ggplot(data = oecd_sum,
mapping = aes(x = year, y = diff, fill = hi_lo))
p + geom_col() + guides(fill = FALSE) +
labs(x = NULL, y = "Difference in Years",
title = "The US Life Expectancy Gap",
subtitle = "Difference between US and OECD average life expectancies, 1960-2015",
caption = "Data: OECD. After a chart by Christopher Ingraham,
Washington Post, December 27th 2017.")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
#> Warning: Removed 1 rows containing missing values (position_stack).with(gss_sm, table(religion))
#> religion
#> Protestant Catholic Jewish None Other
#> 1371 649 51 619 159p <- ggplot(data = gss_sm,
mapping = aes(x = religion, color = religion))
p + geom_bar()p <- ggplot(data = gss_sm,
mapping = aes(x = religion, fill = religion))
p + geom_bar() + guides(fill = FALSE)
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion))
p + geom_bar()p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion))
p + geom_bar(position = "dodge",
mapping = aes(y = ..prop..))p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion))
p + geom_bar(position = "dodge",
mapping = aes(y = ..prop..,
group = religion))rel_by_region <- gss_sm %>%
group_by(bigregion, religion) %>%
tally() %>%
mutate(freq = n / sum(n),
pct = round((freq*100), 1))
rel_by_region
#> # A tibble: 24 × 5
#> # Groups: bigregion [4]
#> bigregion religion n freq pct
#> <fct> <fct> <int> <dbl> <dbl>
#> 1 Northeast Protestant 158 0.324 32.4
#> 2 Northeast Catholic 162 0.332 33.2
#> 3 Northeast Jewish 27 0.0553 5.5
#> 4 Northeast None 112 0.230 23
#> 5 Northeast Other 28 0.0574 5.7
#> 6 Northeast <NA> 1 0.00205 0.2
#> 7 Midwest Protestant 325 0.468 46.8
#> 8 Midwest Catholic 172 0.247 24.7
#> 9 Midwest Jewish 3 0.00432 0.4
#> 10 Midwest None 157 0.226 22.6
#> # … with 14 more rowsp <- ggplot(data = rel_by_region,
mapping = aes(x = pct, y = religion, fill = religion))
p + geom_col() +
labs(x = "Percent", y = NULL) +
guides(fill = FALSE) +
facet_wrap(~ bigregion, nrow = 1)
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.n(),tally(),
andcount()gss_sm %>%
group_by(bigregion, religion) %>%
summarize(n = n())
#> `summarise()` has grouped output by 'bigregion'. You can override using the
#> `.groups` argument.
#> # A tibble: 24 × 3
#> # Groups: bigregion [4]
#> bigregion religion n
#> <fct> <fct> <int>
#> 1 Northeast Protestant 158
#> 2 Northeast Catholic 162
#> 3 Northeast Jewish 27
#> 4 Northeast None 112
#> 5 Northeast Other 28
#> 6 Northeast <NA> 1
#> 7 Midwest Protestant 325
#> 8 Midwest Catholic 172
#> 9 Midwest Jewish 3
#> 10 Midwest None 157
#> # … with 14 more rowsgss_sm %>%
group_by(bigregion, religion) %>%
tally()
#> # A tibble: 24 × 3
#> # Groups: bigregion [4]
#> bigregion religion n
#> <fct> <fct> <int>
#> 1 Northeast Protestant 158
#> 2 Northeast Catholic 162
#> 3 Northeast Jewish 27
#> 4 Northeast None 112
#> 5 Northeast Other 28
#> 6 Northeast <NA> 1
#> 7 Midwest Protestant 325
#> 8 Midwest Catholic 172
#> 9 Midwest Jewish 3
#> 10 Midwest None 157
#> # … with 14 more rowsgss_sm %>%
count(bigregion, religion)
#> # A tibble: 24 × 3
#> bigregion religion n
#> <fct> <fct> <int>
#> 1 Northeast Protestant 158
#> 2 Northeast Catholic 162
#> 3 Northeast Jewish 27
#> 4 Northeast None 112
#> 5 Northeast Other 28
#> 6 Northeast <NA> 1
#> 7 Midwest Protestant 325
#> 8 Midwest Catholic 172
#> 9 Midwest Jewish 3
#> 10 Midwest None 157
#> # … with 14 more rowsp <- ggplot(data = gss_sm, mapping = aes(x = age, y = childs))
p + geom_point(alpha = 0.2) +
geom_smooth() + facet_wrap(~ race)
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 18 rows containing non-finite values (stat_smooth).
#> Warning: Removed 18 rows containing missing values (geom_point).p <- ggplot(data = gss_sm,
mapping = aes(x = age, y = childs))
p + geom_point(alpha = 0.2) +
geom_smooth() +
facet_wrap(~ sex + race, nrow = 1)
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> Warning: Removed 18 rows containing non-finite values (stat_smooth).
#> Warning: Removed 18 rows containing missing values (geom_point).p <- ggplot(data = gss_sm, mapping = aes(x = age, y = childs))
p + geom_point(alpha = 0.2) +
geom_smooth(method = "lm") + facet_grid(bigregion ~ sex + race)
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Removed 18 rows containing non-finite values (stat_smooth).
#> Warning: Removed 18 rows containing missing values (geom_point).organdata
#> # A tibble: 238 × 21
#> country year donors pop pop_d…¹ gdp gdp_lag health healt…² pubhe…³
#> <chr> <date> <dbl> <int> <dbl> <int> <int> <dbl> <dbl> <dbl>
#> 1 Austral… NA NA 17065 0.220 16774 16591 1300 1224 4.8
#> 2 Austral… 1991-01-01 12.1 17284 0.223 17171 16774 1379 1300 5.4
#> 3 Austral… 1992-01-01 12.4 17495 0.226 17914 17171 1455 1379 5.4
#> 4 Austral… 1993-01-01 12.5 17667 0.228 18883 17914 1540 1455 5.4
#> 5 Austral… 1994-01-01 10.2 17855 0.231 19849 18883 1626 1540 5.4
#> 6 Austral… 1995-01-01 10.2 18072 0.233 21079 19849 1737 1626 5.5
#> 7 Austral… 1996-01-01 10.6 18311 0.237 21923 21079 1846 1737 5.6
#> 8 Austral… 1997-01-01 10.3 18518 0.239 22961 21923 1948 1846 5.7
#> 9 Austral… 1998-01-01 10.5 18711 0.242 24148 22961 2077 1948 5.9
#> 10 Austral… 1999-01-01 8.67 18926 0.244 25445 24148 2231 2077 6.1
#> # … with 228 more rows, 11 more variables: roads <dbl>, cerebvas <int>,
#> # assault <int>, external <int>, txp_pop <dbl>, world <chr>, opt <chr>,
#> # consent_law <chr>, consent_practice <chr>, consistent <chr>, ccode <chr>,
#> # and abbreviated variable names ¹pop_dens, ²health_lag, ³pubhealthorgandata %>%
ggplot(mapping = aes(x = year, y = donors, group = country)) +
geom_line() +
facet_wrap(~ reorder(country, -donors, mean, na.rm = TRUE))
#> Warning: Removed 34 row(s) containing missing values (geom_path).organdata %>%
filter(country != "Spain") %>%
ggplot(mapping = aes(x = donors,
y = reorder(country, donors, na.rm = TRUE))) +
geom_boxplot() +
facet_wrap(~ donors, scales = "free_y", ncol = 1)
#> Warning: Removed 32 rows containing non-finite values (stat_boxplot).
#> Warning: Position guide is perpendicular to the intended axis. Did you mean to
#> specify a different guide `position`?#Using the Organ donor data set we will show some more information involving information about it!
organdata %>%
ggplot(mapping = aes(x = donors,year , y= country)) +
geom_point()
#> Warning: Removed 34 rows containing missing values (geom_point).#The information makes it easier to follow as it shows a direct link from year to year data on Organ donor by country.
organdata %>%
ggplot(mapping = aes(x = year, y = donors)) +
geom_point()
#> Warning: Removed 34 rows containing missing values (geom_point).organdata %>%
ggplot(mapping = aes(x = year, y = donors)) +
geom_smooth()
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#> Warning: Removed 34 rows containing non-finite values (stat_smooth).#This data does not seem easy to understand as there is a lot of missing pieces that must be plugged in by the reader.
organdata %>%
ggplot(mapping = aes(x = year, y = donors)) +
geom_line() +
facet_wrap(~ reorder(country, -donors, mean, na.rm = TRUE))
#> Warning: Removed 2 row(s) containing missing values (geom_path).
This version of the graphs looks a lot smoother and easier to
understand.
organdata %>%
ggplot(mapping = aes(x = consent_law, y = consent_practice)) +
geom_point() +
facet_wrap(~ reorder(country, -donors, mean, na.rm = TRUE))And finally,
organdata %>%
ggplot(mapping = aes(x = roads, y = donors)) +
geom_line() + geom_line(method = "lm")
#> Warning: Ignoring unknown parameters: method
#> Warning: Removed 18 row(s) containing missing values (geom_path).
#> Removed 18 row(s) containing missing values (geom_path).It seems as if the more roads are added, there tends to be more donors as well. As there are possibly different stories to each country, it may show that there are more available people to donate organs when there is more road access.