Apply6.knit

#import data

library(tidyverse)
library(readxl)

data <-read_excel("../00_data/Salaries.xlsx")

skimr::skim(data)

Variable type: character

skim_variable	complete_rate	min	max	n_unique
rank	1	4	9	3
discipline	1	1	1	2
sex	1	4	6	2

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
yrs.since.phd	1	22.31	12.89	1	12	21	32	56	▇▇▆▅▁
yrs.service	1	17.61	13.01	0	7	16	27	60	▇▅▃▂▁
salary	1	113706.46	30289.04	57800	91000	107300	134185	231545	▅▇▅▂▁

library(skimr)

data %>%
    ggplot(aes(x=rank)) +
    geom_bar()

diamonds %>%
    ggplot(mapping=aes(x+carat)) +
    geom_histogram(binwidth=0.5)

diamonds %>%
    filter(carat < 3) %>% 
    ggplot(aes(x=carat)) +
    geom_histogram(binwidth = 0.5)

diamonds %>%
    ggplot (aes(x=carat,color=cut)) +
    geom_freqpoly()

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

Missing Values