Quantitative: Histogram

library(mosaicData)

## Warning: package 'mosaicData' was built under R version 4.0.3

data(Marriage, package="mosaicData")
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.3

ggplot(Marriage, aes(x = age)) +
  geom_histogram(fill = "cornflowerblue",
                 color = "white", 
                 bins = 20,
                 binwidth =1) +
  labs(title = "Participants by age",
       subtitle = "number of bins =20",
       x = "age")

ggplot(Marriage, aes(x = age)) +
  geom_density(fill = "indianred3",
               bw = 1) +
  labs(title = "Particitae by age")

ggplot(Marriage, aes(x = age)) +
  geom_dotplot(fill = "gold",
               color = "black") +
  labs(title = "Particitae by age")

## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

# Categorical Vs. Categorical

data(mpg, package = "ggplot2")
ggplot(mpg,
       aes(x = class,
           fill = drv)) +
  geom_bar(position = "stack")

data(mpg, package = "ggplot2")
ggplot(mpg,
       aes(x = class,
           fill = drv)) +
  geom_bar(position = position_dodge(preserve = "single"))

ggplot(mpg,
       aes(x = class,
           fill = drv)) +
  geom_bar(position = "fill") +
  labs(y = "proportion")

library(scales)

## Warning: package 'scales' was built under R version 4.0.3

ggplot(mpg,
       aes(x = factor(class,
                      levels = c("2seater", "subcompact",
                                "compact", "midsize",
                                "minivan", "suv", "pickup")),
           fill = factor(drv,
                         levels = c("f", "r", "4"),
                         labels = c("front-wheel",
                                    "rear-wheel",
                                    "4-wheel")))) +
  geom_bar(position = "fill") +
    scale_y_continuous(breaks = seq(0, 1, 2),
                       label = percent) +
    scale_fill_brewer(palette = "Set2") +
    labs(y = "percent",
    fill = "drive Train",
    x = "Class",
    title = "automobile drive by class") +
    theme_minimal()

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.0.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

plotdata <- mpg %>%
  group_by(class, drv) %>%
  summarize(n = n()) %>%
  mutate(pct =n/sum(n),
         lbl =scales :: percent(pct))

## `summarise()` regrouping output by 'class' (override with `.groups` argument)

plotdata

## # A tibble: 12 x 5
## # Groups:   class [7]
##    class      drv       n    pct lbl  
##    <chr>      <chr> <int>  <dbl> <chr>
##  1 2seater    r         5 1      100% 
##  2 compact    4        12 0.255  26%  
##  3 compact    f        35 0.745  74%  
##  4 midsize    4         3 0.0732 7%   
##  5 midsize    f        38 0.927  93%  
##  6 minivan    f        11 1      100% 
##  7 pickup     4        33 1      100% 
##  8 subcompact 4         4 0.114  11%  
##  9 subcompact f        22 0.629  63%  
## 10 subcompact r         9 0.257  26%  
## 11 suv        4        51 0.823  82%  
## 12 suv        r        11 0.177  18%

ggplot(plotdata, 
       aes(x = factor(class,
                      levels = c("2seater", "subcompact", 
                                 "compact", "midsize", 
                                 "minivan", "suv", "pickup")),
           y = pct,
           fill = factor(drv, 
                         levels = c("f", "r", "4"),
                         labels = c("front-wheel", 
                                    "rear-wheel", 
                                    "4-wheel")))) + 
  geom_bar(stat = "identity",
           position = "fill") +
  scale_y_continuous(breaks = seq(0, 1, .2), 
                     label = percent) +
  geom_text(aes(label = lbl), 
            size = 3, 
            position = position_stack(vjust = 0.5)) +
  scale_fill_brewer(palette = "Set2") +
  labs(y = "Percent", 
       fill = "Drive Train",
       x = "Class",
       title = "Automobile Drive by Class") +
  theme_minimal()

library(carData)

## Warning: package 'carData' was built under R version 4.0.3

data(Salaries, package = "carData")
ggplot(Salaries,
       aes(x = yrs.since.phd,
           y = salary)) +
  geom_point(color = "indianred3",
             size = 2,
             alpha = .8) +
  scale_y_continuous(label = scales::dollar,
                     limits = c(50000, 250000))+
  scale_x_continuous(breaks =seq(0, 60, 10),
                     limits = c(0, 60)) +
  labs(x = "years since phd",
       y = "",
       title = "experience vs. salary",
       subtitle = "9-month salary for 2008")

graph-practice

tl y

1/11/2021

Quantitative: Histogram