library(mosaicData)
## Warning: package 'mosaicData' was built under R version 4.0.3
data(Marriage, package="mosaicData")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
ggplot(Marriage, aes(x = age)) +
geom_histogram(fill = "cornflowerblue",
color = "white",
bins = 20,
binwidth =1) +
labs(title = "Participants by age",
subtitle = "number of bins =20",
x = "age")
ggplot(Marriage, aes(x = age)) +
geom_density(fill = "indianred3",
bw = 1) +
labs(title = "Particitae by age")
ggplot(Marriage, aes(x = age)) +
geom_dotplot(fill = "gold",
color = "black") +
labs(title = "Particitae by age")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
# Categorical Vs. Categorical
data(mpg, package = "ggplot2")
ggplot(mpg,
aes(x = class,
fill = drv)) +
geom_bar(position = "stack")
data(mpg, package = "ggplot2")
ggplot(mpg,
aes(x = class,
fill = drv)) +
geom_bar(position = position_dodge(preserve = "single"))
ggplot(mpg,
aes(x = class,
fill = drv)) +
geom_bar(position = "fill") +
labs(y = "proportion")
library(scales)
## Warning: package 'scales' was built under R version 4.0.3
ggplot(mpg,
aes(x = factor(class,
levels = c("2seater", "subcompact",
"compact", "midsize",
"minivan", "suv", "pickup")),
fill = factor(drv,
levels = c("f", "r", "4"),
labels = c("front-wheel",
"rear-wheel",
"4-wheel")))) +
geom_bar(position = "fill") +
scale_y_continuous(breaks = seq(0, 1, 2),
label = percent) +
scale_fill_brewer(palette = "Set2") +
labs(y = "percent",
fill = "drive Train",
x = "Class",
title = "automobile drive by class") +
theme_minimal()
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
plotdata <- mpg %>%
group_by(class, drv) %>%
summarize(n = n()) %>%
mutate(pct =n/sum(n),
lbl =scales :: percent(pct))
## `summarise()` regrouping output by 'class' (override with `.groups` argument)
plotdata
## # A tibble: 12 x 5
## # Groups: class [7]
## class drv n pct lbl
## <chr> <chr> <int> <dbl> <chr>
## 1 2seater r 5 1 100%
## 2 compact 4 12 0.255 26%
## 3 compact f 35 0.745 74%
## 4 midsize 4 3 0.0732 7%
## 5 midsize f 38 0.927 93%
## 6 minivan f 11 1 100%
## 7 pickup 4 33 1 100%
## 8 subcompact 4 4 0.114 11%
## 9 subcompact f 22 0.629 63%
## 10 subcompact r 9 0.257 26%
## 11 suv 4 51 0.823 82%
## 12 suv r 11 0.177 18%
ggplot(plotdata,
aes(x = factor(class,
levels = c("2seater", "subcompact",
"compact", "midsize",
"minivan", "suv", "pickup")),
y = pct,
fill = factor(drv,
levels = c("f", "r", "4"),
labels = c("front-wheel",
"rear-wheel",
"4-wheel")))) +
geom_bar(stat = "identity",
position = "fill") +
scale_y_continuous(breaks = seq(0, 1, .2),
label = percent) +
geom_text(aes(label = lbl),
size = 3,
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette = "Set2") +
labs(y = "Percent",
fill = "Drive Train",
x = "Class",
title = "Automobile Drive by Class") +
theme_minimal()
library(carData)
## Warning: package 'carData' was built under R version 4.0.3
data(Salaries, package = "carData")
ggplot(Salaries,
aes(x = yrs.since.phd,
y = salary)) +
geom_point(color = "indianred3",
size = 2,
alpha = .8) +
scale_y_continuous(label = scales::dollar,
limits = c(50000, 250000))+
scale_x_continuous(breaks =seq(0, 60, 10),
limits = c(0, 60)) +
labs(x = "years since phd",
y = "",
title = "experience vs. salary",
subtitle = "9-month salary for 2008")