# Load packages
library(tidyverse)
# Import data
PUMS_cleaned <- read.csv("file:///C:/Users/jzbowman/Downloads/PUMS_cleaned.csv") %>% as_tibble()
PUMS_cleaned
## # A tibble: 67,248 x 6
## X PUMA age education field_of_degree income
## <int> <int> <int> <fct> <fct> <int>
## 1 1 1000 87 lessthanBA <NA> 11800
## 2 2 900 42 lessthanBA <NA> 8800
## 3 3 800 43 BAorhigher English Language 10000
## 4 4 800 43 lessthanBA <NA> 112000
## 5 5 800 14 lessthanBA <NA> NA
## 6 6 800 11 lessthanBA <NA> NA
## 7 7 900 63 lessthanBA <NA> 23900
## 8 8 900 59 BAorhigher Early Childhood Education 34600
## 9 9 900 65 lessthanBA <NA> 9400
## 10 10 300 50 lessthanBA <NA> 18000
## # ... with 67,238 more rows
Each row represents an individual
character
This is a data frame, since there are both numeric and character elements to the columns.
Hint: Use View().
View(PUMS_cleaned)
The first individual is an 87 year old from Rockingham County with less than BA in education making $11,800 a year.
count(PUMS_cleaned, education)
## # A tibble: 2 x 2
## education n
## <fct> <int>
## 1 BAorhigher 18563
## 2 lessthanBA 48685
18563
Hint: Take PUMS_cleaned, pipe it to dplyr::count, and pipe it to dplyr::filter.
PUMS_cleaned %>%
count(field_of_degree) %>%
filter(field_of_degree == "Finance")
## # A tibble: 1 x 2
## field_of_degree n
## <fct> <int>
## 1 Finance 185
185
ggplot(PUMS_cleaned, aes(income)) + geom_histogram() + scale_x_log10(labels = scales::dollar_format())
PUMS_cleaned %>%
group_by(field_of_degree) %>%
summarise(med_inc = median(income)) %>%
arrange(desc(med_inc))
## # A tibble: 169 x 2
## field_of_degree med_inc
## <fct> <dbl>
## 1 Petroleum Engineering 188000
## 2 Materials Science 154800
## 3 Nuclear Engineering 148300
## 4 Physical Sciences 129000
## 5 Mechanical Engineering Related Technologies 111900
## 6 Pharmacy Pharmaceutical Sciences 106700
## 7 Biological Engineering 101800
## 8 Metallurgical Engineering 99700
## 9 Naval Architecture 97000
## 10 Electrical Engineering 94000
## # ... with 159 more rows
The top field of degree is Petroleum Engineering, making $188,000 a year.