#import data
library(tidyverse)
library(readxl)
data <-read_excel("../00_data/Salaries.xlsx")
skimr::skim(data)
| Name | data |
| Number of rows | 397 |
| Number of columns | 6 |
| _______________________ | |
| Column type frequency: | |
| character | 3 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| rank | 0 | 1 | 4 | 9 | 0 | 3 | 0 |
| discipline | 0 | 1 | 1 | 1 | 0 | 2 | 0 |
| sex | 0 | 1 | 4 | 6 | 0 | 2 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| yrs.since.phd | 0 | 1 | 22.31 | 12.89 | 1 | 12 | 21 | 32 | 56 | ▇▇▆▅▁ |
| yrs.service | 0 | 1 | 17.61 | 13.01 | 0 | 7 | 16 | 27 | 60 | ▇▅▃▂▁ |
| salary | 0 | 1 | 113706.46 | 30289.04 | 57800 | 91000 | 107300 | 134185 | 231545 | ▅▇▅▂▁ |
library(skimr)
data %>%
ggplot(aes(x=rank)) +
geom_bar()
diamonds %>%
ggplot(mapping=aes(x+carat)) +
geom_histogram(binwidth=0.5)
diamonds %>%
filter(carat < 3) %>%
ggplot(aes(x=carat)) +
geom_histogram(binwidth = 0.5)
diamonds %>%
ggplot (aes(x=carat,color=cut)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.