library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gapminder)
# set the file name for the first graph
png(file = "diamonds_loess.png")
ggplot(data = gapminder,
mapping = aes(x= gdpPercap,
y = lifeExp)) +
geom_point(mapping = aes(color = continent)) +
geom_smooth(method = "loess") +
scale_x_log10()
## `geom_smooth()` using formula = 'y ~ x'
# set the file name for the first graph
png(file = "diamonds_lm.png")
ggplot(data = gapminder,
mapping = aes(x= gdpPercap,
y = lifeExp)) +
geom_point(mapping = aes(color = continent)) +
geom_smooth(method = "lm") +
scale_x_log10()
## `geom_smooth()` using formula = 'y ~ x'
# Save the file
dev.off()
## png
## 2
plot <- ggplot(data = gapminder,
mapping = aes(x= gdpPercap,
y = lifeExp)) +
geom_point(mapping = aes(color = continent)) +
geom_smooth(method = "loess") +
scale_x_log10()
# Save the plot as a png
ggsave(file = "diamonds_ggs.png", plot = plot, width = 10, height = 5)
## `geom_smooth()` using formula = 'y ~ x'
# Use the aes color mapping to country. Save the plot because it is huge.
p <- ggplot(data = gapminder,
mapping = aes(x= year,
y = gdpPercap)) +
geom_point(mapping = aes(color = country))
# Save the plot as a png
ggsave(file = "YearVsgdp_country.png", plot = p, width = 20, height = 20)
p <- ggplot(data = gapminder,
mapping = aes(x= year,
y = gdpPercap)) +
geom_point(mapping = aes(group = country))
p <- ggplot(data = gapminder,
mapping = aes(x= year,
y = gdpPercap)) +
geom_line(mapping = aes(group = country, color = country))
# Save the plot as a png
ggsave(file = "YearVsgdp_Groupedcountry.png", plot = p, width = 20, height = 20)
facet <- ggplot(data = gapminder,
mapping = aes(x= year,
y = gdpPercap)) +
geom_line(mapping = aes(group = country)) +
facet_wrap(~continent)
facet
facet <- ggplot(data = gapminder,
mapping = aes(x= year,
y = gdpPercap)) +
geom_line(color = "#f857a6", mapping = aes(group = country)) +
geom_smooth(size = 1.1, method = "loess", color = "lightblue", se = FALSE) +
scale_y_log10(labels = scales::dollar) +
facet_wrap(~continent, ncol = 5) +
labs(x = "Year",
y = "GDP per capita",
title = "GDP per Capita on Five Continents",
subtitle = "Source: Gapminder")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
facet
## `geom_smooth()` using formula = 'y ~ x'
ggsave(file = "yearVSGDP_facetNcol.png", plot = facet)
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
facet <- ggplot(data = gapminder,
mapping = aes(x= year,
y = gdpPercap)) +
geom_line(color = "#f64f59", mapping = aes(group = country)) +
geom_smooth(size = 1.1, method = "loess", color = "#12c2e9", se = FALSE) +
scale_y_log10(labels = scales::dollar) +
theme(axis.text.x = element_text(angle = 90)) +
facet_wrap(~continent, ncol = 5) +
labs(x = "Year",
y = "GDP per capita",
title = "GDP per Capita on Five Continents",
subtitle = "Source: Gapminder")
facet
## `geom_smooth()` using formula = 'y ~ x'
ggsave(file = "yearVSGDP_facetNcol_x90.png", plot = facet)
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
Facets can also be used in grid form to compare multiple axes of data. This removes the limitation of 1 row with many columns. To prepare the examples, switch datasets to gss_sm
# Take a peak at the new data set
#devtools::install_github("kjhealy/socviz")
library(socviz)
glimpse(gss_sm)
## Rows: 2,867
## Columns: 32
## $ year <dbl> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016…
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ ballot <labelled> 1, 2, 3, 1, 3, 2, 1, 3, 1, 3, 2, 1, 2, 3, 2, 3, 3, 2,…
## $ age <dbl> 47, 61, 72, 43, 55, 53, 50, 23, 45, 71, 33, 86, 32, 60, 76…
## $ childs <dbl> 3, 0, 2, 4, 2, 2, 2, 3, 3, 4, 5, 4, 3, 5, 7, 2, 6, 5, 0, 2…
## $ sibs <labelled> 2, 3, 3, 3, 2, 2, 2, 6, 5, 1, 4, 4, 3, 6, 0, 1, 3, 8,…
## $ degree <fct> Bachelor, High School, Bachelor, High School, Graduate, Ju…
## $ race <fct> White, White, White, White, White, White, White, Other, Bl…
## $ sex <fct> Male, Male, Male, Female, Female, Female, Male, Female, Ma…
## $ region <fct> New England, New England, New England, New England, New En…
## $ income16 <fct> $170000 or over, $50000 to 59999, $75000 to $89999, $17000…
## $ relig <fct> None, None, Catholic, Catholic, None, None, None, Catholic…
## $ marital <fct> Married, Never Married, Married, Married, Married, Married…
## $ padeg <fct> Graduate, Lt High School, High School, NA, Bachelor, NA, H…
## $ madeg <fct> High School, High School, Lt High School, High School, Hig…
## $ partyid <fct> "Independent", "Ind,near Dem", "Not Str Republican", "Not …
## $ polviews <fct> Moderate, Liberal, Conservative, Moderate, Slightly Libera…
## $ happy <fct> Pretty Happy, Pretty Happy, Very Happy, Pretty Happy, Very…
## $ partners <fct> NA, "1 Partner", "1 Partner", NA, "1 Partner", "1 Partner"…
## $ grass <fct> NA, Legal, Not Legal, NA, Legal, Legal, NA, Not Legal, NA,…
## $ zodiac <fct> Aquarius, Scorpio, Pisces, Cancer, Scorpio, Scorpio, Capri…
## $ pres12 <labelled> 3, 1, 2, 2, 1, 1, NA, NA, NA, 2, NA, NA, 1, 1, 2, 1, …
## $ wtssall <dbl> 0.9569935, 0.4784968, 0.9569935, 1.9139870, 1.4354903, 0.9…
## $ income_rc <fct> Gt $170000, Gt $50000, Gt $75000, Gt $170000, Gt $170000, …
## $ agegrp <fct> Age 45-55, Age 55-65, Age 65+, Age 35-45, Age 45-55, Age 4…
## $ ageq <fct> Age 34-49, Age 49-62, Age 62+, Age 34-49, Age 49-62, Age 4…
## $ siblings <fct> 2, 3, 3, 3, 2, 2, 2, 6+, 5, 1, 4, 4, 3, 6+, 0, 1, 3, 6+, 2…
## $ kids <fct> 3, 0, 2, 4+, 2, 2, 2, 3, 3, 4+, 4+, 4+, 3, 4+, 4+, 2, 4+, …
## $ religion <fct> None, None, Catholic, Catholic, None, None, None, Catholic…
## $ bigregion <fct> Northeast, Northeast, Northeast, Northeast, Northeast, Nor…
## $ partners_rc <fct> NA, 1, 1, NA, 1, 1, NA, 1, NA, 3, 1, NA, 1, NA, 0, 1, 0, N…
## $ obama <dbl> 0, 1, 0, 0, 1, 1, NA, NA, NA, 0, NA, NA, 1, 1, 0, 1, 0, 1,…
a <- ggplot(data = gss_sm,
mapping = aes(x = age, y = childs)) +
geom_point(alpha = 0.2) +
geom_smooth() +
facet_grid(sex ~ race)
a
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 18 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 18 rows containing missing values (`geom_point()`).
# Use the geom_bar() to produce a historgram
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion)) +
geom_bar()
p
# Using the "..prop.." stat_function, display the proportion of big region frequencies
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion)) +
geom_bar(mapping = aes(y = ..prop..))
p
## Warning: The dot-dot notation (`..prop..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(prop)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion)) +
geom_bar(mapping = aes(y = ..prop.., group = 1))
p
table(gss_sm$religion)
##
## Protestant Catholic Jewish None Other
## 1371 649 51 619 159
# Color coded outline
p <- ggplot(data = gss_sm,
mapping = aes(x = religion, color = religion)) +
geom_bar()
p
# Filled in colored bars
p <- ggplot(data = gss_sm,
mapping = aes(x = religion, color = religion)) +
geom_bar() + guides(fill = FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p
# Color coded outline
p <- ggplot(data = gss_sm,
mapping = aes(x = polviews, color = polviews)) +
geom_bar()
p
# Filled in colored bars
p <- ggplot(data = gss_sm,
mapping = aes(x = polviews, color = polviews)) +
geom_bar() + guides(fill = FALSE) +
theme(axis.text.x = element_text(angle = 90))
p
# Show color coded religion frequencies by region
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar()
p
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar(position = "fill")
p
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar(position = "dodge")
p
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar(position = "dodge",
mapping = aes(y = ..prop..))
p
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar(position = "dodge",
mapping = aes(y = ..prop..,
group = religion))
p
First, we switch to the midwest data set
head(midwest)
## # A tibble: 6 × 28
## PID county state area poptotal popdensity popwhite popblack popamerindian
## <int> <chr> <chr> <dbl> <int> <dbl> <int> <int> <int>
## 1 561 ADAMS IL 0.052 66090 1271. 63917 1702 98
## 2 562 ALEXAND… IL 0.014 10626 759 7054 3496 19
## 3 563 BOND IL 0.022 14991 681. 14477 429 35
## 4 564 BOONE IL 0.017 30806 1812. 29344 127 46
## 5 565 BROWN IL 0.018 5836 324. 5264 547 14
## 6 566 BUREAU IL 0.05 35688 714. 35157 50 65
## # ℹ 19 more variables: popasian <int>, popother <int>, percwhite <dbl>,
## # percblack <dbl>, percamerindan <dbl>, percasian <dbl>, percother <dbl>,
## # popadults <int>, perchsd <dbl>, percollege <dbl>, percprof <dbl>,
## # poppovertyknown <int>, percpovertyknown <dbl>, percbelowpoverty <dbl>,
## # percchildbelowpovert <dbl>, percadultpoverty <dbl>,
## # percelderlypoverty <dbl>, inmetro <int>, category <chr>
# Let geom_histogram determine the bins
p <- ggplot(data = midwest,
mapping = aes(x = area)) +
geom_histogram()
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Set bins = 10
p <- ggplot(data = midwest,
mapping = aes(x = area)) +
geom_histogram(bins = 10)
p
# Create a vector of the state abbreviations we want
oh_wi <- c("OH","WI")
# Select all midwest rows, all columns where the stae is either OH or WI
midwest_ohwi <- subset(midwest, subset = state %in% oh_wi)
head(midwest_ohwi)
## # A tibble: 6 × 28
## PID county state area poptotal popdensity popwhite popblack popamerindian
## <int> <chr> <chr> <dbl> <int> <dbl> <int> <int> <int>
## 1 2009 ADAMS OH 0.035 25371 725. 25212 47 67
## 2 2010 ALLEN OH 0.024 109755 4573. 96177 12313 202
## 3 2011 ASHLAND OH 0.025 47507 1900. 46686 460 49
## 4 2012 ASHTABU… OH 0.041 99821 2435. 95465 3138 196
## 5 2013 ATHENS OH 0.03 59549 1985. 56163 1678 167
## 6 2014 AUGLAIZE OH 0.024 44585 1858. 44225 66 50
## # ℹ 19 more variables: popasian <int>, popother <int>, percwhite <dbl>,
## # percblack <dbl>, percamerindan <dbl>, percasian <dbl>, percother <dbl>,
## # popadults <int>, perchsd <dbl>, percollege <dbl>, percprof <dbl>,
## # poppovertyknown <int>, percpovertyknown <dbl>, percbelowpoverty <dbl>,
## # percchildbelowpovert <dbl>, percadultpoverty <dbl>,
## # percelderlypoverty <dbl>, inmetro <int>, category <chr>
# Create a histogram of the ohwi subset and color code by state
p <- ggplot(data = midwest_ohwi,
mapping = aes(x = area, fill = state)) +
geom_histogram(alpha = 0.4, bins = 20)
p
p <- ggplot(data = midwest_ohwi,
mapping = aes(x = area, fill = state)) +
geom_density(alpha = 0.4)
p
ggplot() automatically does data transformation work for you. Sometimes you want to disable it because the data is already transformed. Of the geom_, set stat = “identity” to disable the automation
# titanic dataset contains pre-transformed data
titanic
## fate sex n percent
## 1 perished male 1364 62.0
## 2 perished female 126 5.7
## 3 survived male 367 16.7
## 4 survived female 344 15.6
# Create a stacked bar chart
p <- ggplot(data = titanic,
mapping = aes(x = fate,
y = percent,
fill = sex)) +
geom_bar(stat = "identity", position = "dodge") +
theme(legend.position = "top")
p