# excel file
data <- read_excel("Salaries.xlsx")
data
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
## 7 Prof B 30 23 Male 175000
## 8 Prof B 45 45 Male 147765
## 9 Prof B 21 20 Male 119250
## 10 Prof B 18 18 Female 129000
## # … with 387 more rows
What occupational industry makes up the majority of those who are considered wealthy?
data %>%
ggplot(aes(yrs.since.phd)) +
geom_point(mapping = aes(x = yrs.since.phd, y = salary))
data %>%
# Plot
ggplot(aes(x = yrs.since.phd)) +
geom_histogram(binwidth = 1)
data %>%
ggplot(aes(yrs.service)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
# Filter(salary < 100000 | yrs.service > 300000)
mutate(salary = ifelse(salary <100000 | salary >300000, NA, salary))
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male NA
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male NA
## 7 Prof B 30 23 Male 175000
## 8 Prof B 45 45 Male 147765
## 9 Prof B 21 20 Male 119250
## 10 Prof B 18 18 Female 129000
## # … with 387 more rows
data %>%
ggplot(aes(x = yrs.since.phd, y = yrs.service)) +
geom_boxplot()
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?
data %>%
count(yrs.since.phd, yrs.service) %>%
ggplot(aes(x = yrs.since.phd, y = yrs.service, fill = n)) +
geom_tile()
library(hexbin)
data %>%
ggplot(aes(x = yrs.service, y = salary))+
geom_hex()