datatib %>%
filter(NAICS_Decription=="Construction") %>%
arrange(desc(Employment))
## # A tibble: 437 x 3
## Congressional_District NAICS_Decription Employment
## <fct> <fct> <int>
## 1 LA 6 Construction 58288
## 2 NY 12 Construction 38877
## 3 TX 24 Construction 34315
## 4 TX 2 Construction 33409
## 5 TX 36 Construction 29717
## 6 TX 7 Construction 28496
## 7 TX 29 Construction 28021
## 8 TX 21 Construction 27954
## 9 CO 7 Construction 27498
## 10 PA 18 Construction 26692
## # ... with 427 more rows
LA 6 district has the most jobs in construction.
datatib %>%
filter(Congressional_District=="NY 10") %>%
arrange(desc(Employment))
## # A tibble: 19 x 3
## Congressional_District NAICS_Decription Employment
## <fct> <fct> <int>
## 1 NY 10 Finance and insurance 109148
## 2 NY 10 Health care and social assistance 102966
## 3 NY 10 Professional, scientific, and techni… 96054
## 4 NY 10 Accommodation and food services 81321
## 5 NY 10 Educational services 64752
## 6 NY 10 Retail trade 56372
## 7 NY 10 Information 53708
## 8 NY 10 Administrative and support and waste 47566
## 9 NY 10 Other services (except public admini… 40511
## 10 NY 10 Wholesale trade 25544
## 11 NY 10 Arts, entertainment, and recreation 24273
## 12 NY 10 Real estate and rental and leasing 22053
## 13 NY 10 Management of companies and enterpri… 20463
## 14 NY 10 Construction 15143
## 15 NY 10 Manufacturing 11789
## 16 NY 10 Transportation and warehousing 10830
## 17 NY 10 Utilities 916
## 18 NY 10 Industries not classified 179
## 19 NY 10 Mining, quarrying, and oil and gas e… 0
In New York’s 10th congressional district, finance and insurance has the highest employment (109148), and mining, quarrying, and oil and gas extraction has the lowest employment (0).
datatib2<-datatib %>%
rename(CD=Congressional_District)
## # A tibble: 8,722 x 3
## CD NAICS_Decription Employment
## <fct> <fct> <int>
## 1 AK 1 Agriculture, forestry, fishing and hunting 819
## 2 AK 1 Mining, quarrying, and oil and gas extraction 13566
## 3 AK 1 Utilities 2081
## 4 AK 1 Construction 18581
## 5 AK 1 Manufacturing 12294
## 6 AK 1 Wholesale trade 9125
## 7 AK 1 Retail trade 35221
## 8 AK 1 Transportation and warehousing 18736
## 9 AK 1 Information 6711
## 10 AK 1 Finance and insurance 7340
## # ... with 8,712 more rows
datatib3<-separate(datatib2, CD, into=c("state", "congressional district"))
## # A tibble: 8,722 x 4
## state `congressional district` NAICS_Decription Employment
## * <chr> <chr> <fct> <int>
## 1 AK 1 Agriculture, forestry, fishi… 819
## 2 AK 1 Mining, quarrying, and oil a… 13566
## 3 AK 1 Utilities 2081
## 4 AK 1 Construction 18581
## 5 AK 1 Manufacturing 12294
## 6 AK 1 Wholesale trade 9125
## 7 AK 1 Retail trade 35221
## 8 AK 1 Transportation and warehousi… 18736
## 9 AK 1 Information 6711
## 10 AK 1 Finance and insurance 7340
## # ... with 8,712 more rows
datatib3 %>%
unite(CD_united, "state", "congressional district", sep=" ")
## # A tibble: 8,722 x 3
## CD_united NAICS_Decription Employment
## * <chr> <fct> <int>
## 1 AK 1 Agriculture, forestry, fishing and hunting 819
## 2 AK 1 Mining, quarrying, and oil and gas extraction 13566
## 3 AK 1 Utilities 2081
## 4 AK 1 Construction 18581
## 5 AK 1 Manufacturing 12294
## 6 AK 1 Wholesale trade 9125
## 7 AK 1 Retail trade 35221
## 8 AK 1 Transportation and warehousing 18736
## 9 AK 1 Information 6711
## 10 AK 1 Finance and insurance 7340
## # ... with 8,712 more rows
datatib5<-select(datatib, Congressional_District, NAICS_Decription)
datatib5$mergeid<-seq(1,8722,1)
## # A tibble: 8,722 x 3
## Congressional_District NAICS_Decription mergeid
## <fct> <fct> <dbl>
## 1 AK 1 Agriculture, forestry, fishing and hunt… 1.00
## 2 AK 1 Mining, quarrying, and oil and gas extr… 2.00
## 3 AK 1 Utilities 3.00
## 4 AK 1 Construction 4.00
## 5 AK 1 Manufacturing 5.00
## 6 AK 1 Wholesale trade 6.00
## 7 AK 1 Retail trade 7.00
## 8 AK 1 Transportation and warehousing 8.00
## 9 AK 1 Information 9.00
## 10 AK 1 Finance and insurance 10.0
## # ... with 8,712 more rows
datatib6<-select(datatib, Congressional_District, Employment)
datatib6$mergeid<-seq(1,8722,1)
## # A tibble: 8,722 x 3
## Congressional_District Employment mergeid
## <fct> <int> <dbl>
## 1 AK 1 819 1.00
## 2 AK 1 13566 2.00
## 3 AK 1 2081 3.00
## 4 AK 1 18581 4.00
## 5 AK 1 12294 5.00
## 6 AK 1 9125 6.00
## 7 AK 1 35221 7.00
## 8 AK 1 18736 8.00
## 9 AK 1 6711 9.00
## 10 AK 1 7340 10.0
## # ... with 8,712 more rows
datatib7<-datatib6 %>%
left_join(datatib5, by="mergeid")
## # A tibble: 8,722 x 5
## Congressional_Di… Employment mergeid Congressional_D… NAICS_Decription
## <fct> <int> <dbl> <fct> <fct>
## 1 AK 1 819 1.00 AK 1 Agriculture, for…
## 2 AK 1 13566 2.00 AK 1 Mining, quarryin…
## 3 AK 1 2081 3.00 AK 1 Utilities
## 4 AK 1 18581 4.00 AK 1 Construction
## 5 AK 1 12294 5.00 AK 1 Manufacturing
## 6 AK 1 9125 6.00 AK 1 Wholesale trade
## 7 AK 1 35221 7.00 AK 1 Retail trade
## 8 AK 1 18736 8.00 AK 1 Transportation a…
## 9 AK 1 6711 9.00 AK 1 Information
## 10 AK 1 7340 10.0 AK 1 Finance and insu…
## # ... with 8,712 more rows
datatib7 %>%
group_by(NAICS_Decription) %>%
summarise(avgemp=mean(Employment,na.rm=TRUE)) %>%
arrange(desc(avgemp))
## # A tibble: 20 x 2
## NAICS_Decription avgemp
## <fct> <dbl>
## 1 Health care and social assistance 44059
## 2 Retail trade 36221
## 3 Accommodation and food services 30366
## 4 Manufacturing 26725
## 5 Professional, scientific, and technical 19264
## 6 Administrative and support and waste 18205
## 7 Finance and insurance 13783
## 8 Construction 13691
## 9 Wholesale trade 13617
## 10 Other services (except public administration) 12395
## 11 Transportation and warehousing 10539
## 12 Educational services 8362
## 13 Information 7738
## 14 Management of companies and enterprises 7549
## 15 Arts, entertainment, and recreation 5112
## 16 Real estate and rental and leasing 4748
## 17 Mining, quarrying, and oil and gas extraction 1505
## 18 Utilities 1182
## 19 Agriculture, forestry, fishing and hunting 366
## 20 Industries not classified 57.1
datatib9<-datatib %>%
spread(Congressional_District, Employment)
## # A tibble: 20 x 438
## NAICS_Decription `AK 1` `AL 1` `AL 2` `AL 3` `AL 4` `AL 5` `AL 6`
## * <fct> <int> <int> <int> <int> <int> <int> <int>
## 1 Accommodation and foo… 28112 29067 24892 21075 17891 27023 26588
## 2 Administrative and su… 17628 15559 13053 10887 9185 12414 19994
## 3 Agriculture, forestry… 819 1196 1762 688 879 189 574
## 4 Arts, entertainment, … 4639 2996 3753 1117 966 2472 4495
## 5 Construction 18581 15516 8957 6979 6824 10506 15028
## 6 Educational services 3025 4882 3821 3288 701 3622 5399
## 7 Finance and insurance 7340 7760 6992 4581 5792 6630 21357
## 8 Health care and socia… 49240 30929 36033 26115 29705 38179 29929
## 9 Industries not classi… 76 28 89 26 60 30 33
## 10 Information 6711 3997 3416 1968 1960 5520 10993
## 11 Management of compani… 7821 3262 2212 505 1310 1732 6600
## 12 Manufacturing 12294 29097 27629 37234 49061 39130 17105
## 13 Mining, quarrying, an… 13566 569 228 616 3123 135 1740
## 14 Other services (excep… 9867 12075 11003 7888 7898 11782 14806
## 15 Professional, scienti… 18662 11597 9794 4620 3655 38966 12408
## 16 Real estate and renta… 4455 4597 2952 1967 1825 2846 3720
## 17 Retail trade 35221 38212 35182 26608 27451 36025 40130
## 18 Transportation and wa… 18736 9270 11036 5974 6447 5512 3922
## 19 Utilities 2081 1903 3278 1022 1435 128 0
## 20 Wholesale trade 9125 9735 10589 5673 6239 9194 11905
## # ... with 430 more variables: `AL 7` <int>, `AR 1` <int>, `AR 2` <int>,
## # `AR 3` <int>, `AR 4` <int>, `AZ 1` <int>, `AZ 2` <int>, `AZ 3` <int>,
## # `AZ 4` <int>, `AZ 5` <int>, `AZ 6` <int>, `AZ 7` <int>, `AZ 8` <int>,
## # `AZ 9` <int>, `CA 1` <int>, `CA 10` <int>, `CA 11` <int>, `CA
## # 12` <int>, `CA 13` <int>, `CA 14` <int>, `CA 15` <int>, `CA 16` <int>,
## # `CA 17` <int>, `CA 18` <int>, `CA 19` <int>, `CA 2` <int>, `CA
## # 20` <int>, `CA 21` <int>, `CA 22` <int>, `CA 23` <int>, `CA 24` <int>,
## # `CA 25` <int>, `CA 26` <int>, `CA 27` <int>, `CA 28` <int>, `CA
## # 29` <int>, `CA 3` <int>, `CA 30` <int>, `CA 31` <int>, `CA 32` <int>,
## # `CA 33` <int>, `CA 34` <int>, `CA 35` <int>, `CA 36` <int>, `CA
## # 37` <int>, `CA 38` <int>, `CA 39` <int>, `CA 4` <int>, `CA 40` <int>,
## # `CA 41` <int>, `CA 42` <int>, `CA 43` <int>, `CA 44` <int>, `CA
## # 45` <int>, `CA 46` <int>, `CA 47` <int>, `CA 48` <int>, `CA 49` <int>,
## # `CA 5` <int>, `CA 50` <int>, `CA 51` <int>, `CA 52` <int>, `CA
## # 53` <int>, `CA 6` <int>, `CA 7` <int>, `CA 8` <int>, `CA 9` <int>, `CO
## # 1` <int>, `CO 2` <int>, `CO 3` <int>, `CO 4` <int>, `CO 5` <int>, `CO
## # 6` <int>, `CO 7` <int>, `CT 1` <int>, `CT 2` <int>, `CT 3` <int>, `CT
## # 4` <int>, `CT 5` <int>, `DC 98` <int>, `DE 1` <int>, `FL 1` <int>, `FL
## # 10` <int>, `FL 11` <int>, `FL 12` <int>, `FL 13` <int>, `FL 14` <int>,
## # `FL 15` <int>, `FL 16` <int>, `FL 17` <int>, `FL 18` <int>, `FL
## # 19` <int>, `FL 2` <int>, `FL 20` <int>, `FL 21` <int>, `FL 22` <int>,
## # `FL 23` <int>, `FL 24` <int>, `FL 25` <int>, `FL 26` <int>, …
datatib10<-datatib9 %>%
gather(Congressional_District, Employment, "AK 1":"WY 1")
## # A tibble: 8,740 x 3
## NAICS_Decription Congressional_Di… Employment
## <fct> <chr> <int>
## 1 Accommodation and food services AK 1 28112
## 2 Administrative and support and waste AK 1 17628
## 3 Agriculture, forestry, fishing and hunting AK 1 819
## 4 Arts, entertainment, and recreation AK 1 4639
## 5 Construction AK 1 18581
## 6 Educational services AK 1 3025
## 7 Finance and insurance AK 1 7340
## 8 Health care and social assistance AK 1 49240
## 9 Industries not classified AK 1 76
## 10 Information AK 1 6711
## # ... with 8,730 more rows
Thanks for reading :)