library(dplyr)
library(stringr)

Final Project

1) Objective:

To create a Shiny App/visualization that displays the relationship between different occupations and salary for many years (2007 to 2015).

2) Description:

In this project we would like to find those occupations that pay higher wages and salaries.

3) Data Source:

Datasets related to occupation and wage estimates are available in the public domain and it can be downloaded from the following site for the years 2007 to 2015:
  • Department of Labor Statistics (https://www.bls.gov/oes/tables.htm)
  • 4) Research Questions:

    The following questions will be addressed by this project:
  • What are the top 30 occupations reported (for all years)
  • For a specific year, find the job title that was paid the highest in a specific occupation
  • Find the salary history of a job title to determine if it is increasing or decreasing
  • 5) Shiny App:

    The shiny app consists of the following tabs.
  • Top Occupations/All Categories
  • Top Occupations/Per Category
  • Salary History/Occupation
  • 6) Data Cleaup/Preparation:

    The following functions are used to prepare data for the shiny app. All the CSV files for years 2007 to 2015 and read and cleaned.

    7) CSV Files:

    This process generates the following CSV files that are used by the Shiny app:
  • occupation_df.csv
  • salary_major_df.csv
  • salary_total_df.csv
  • title_df.csv
  • year_df.csv

  • # Function to convert the first Character to Uppercase
    simpleCap <- function(x) {
      s <- strsplit(x, " ")[[1]]
      paste(toupper(substring(s, 1,1)), substring(s, 2), sep="", collapse=" ")
    }
    
    # Function to clean the data
    getCleanData <- function(salary_year_df) {
      salary_year_df <- select(salary_year_df, matches("OCC_CODE|TITLE|OCC_GROUP|TOTAL_EMPLOYEES|SALARY|YEAR"))
      salary_year_df$OCC_CODE = gsub("\\-", "_", salary_year_df$OCC_CODE)
      salary_year_df$TITLE = sapply(salary_year_df$TITLE, simpleCap)
      salary_year_df$TOTAL_EMPLOYEES <- as.numeric(gsub(",", "", salary_year_df$TOTAL_EMPLOYEES))
      salary_year_df$SALARY <- as.numeric(gsub(",", "", salary_year_df$SALARY))
      salary_year_df$TITLE  <- gsub("\\*", "", salary_year_df$TITLE)
      salary_year_df$TITLE  <- gsub("Computer And Information Scientists, Research", 
                                    "Computer And Information Research Scientists", salary_year_df$TITLE)
      salary_year_df$OCC_CODE <- gsub("29_1121", "29_1181", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("21_1099", "21_1798", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("15_1011", "15_1111", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("15_1199", "15_1199", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("15_1021", "15_1131", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("15_1041", "15_1150", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("15_1051", "15_1121", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("47_4099", "47_4799", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("15_1061", "15_1141", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("51_5022", "51_5111", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("51_9199", "51_9399", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("29_2034", "29_2037", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("29_1111", "29_1141", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("41_9099", "41_9799", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("25_3099", "25_3999", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("11_3042", "11_3131", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("13_1073", "13_1151", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE <- gsub("39_6022", "39_7012", salary_year_df$OCC_CODE)
      salary_year_df$OCC_CODE_SHORT <- str_sub(as.character(salary_year_df$OCC_CODE), 1, 2)
      salary_year_df <- arrange(salary_year_df, desc(SALARY))
      return (salary_year_df)
    }
    
    # Function to get the top 20 rows of the dataset
    get20Lines <- function(salary_year_df) {
      return (head(salary_year_df, 20))
    }
    
    # Function to print the dataset
    printDataFrameInfo <- function(salary_year_df, numberofRows) {
      ncol(salary_year_df)
      nrow(salary_year_df)
      head(salary_year_df, numberofRows)
    }
    
    # Function to get all occupations (majors)
    getOccupationData <- function(salary_total_df) {
      occupation_df <- filter(salary_total_df, OCC_GROUP  == "major")
      occupation_df$TITLE <- gsub(" Occupations", "", occupation_df$TITLE)
      occupation_df <- arrange(occupation_df, OCC_CODE)
      occupation_df <- distinct(occupation_df, OCC_CODE, .keep_all = TRUE)
      occupation_df <- arrange(occupation_df, TITLE)
      occupation_df <- select(occupation_df,  matches("OCC_CODE|TITLE|OCC_CODE_SHORT"))
      return (occupation_df)
    }
    
    # Function to get all occupation titles (not majors)
    getTitleData <- function(salary_total_df) {
      title_df <- filter(salary_total_df, OCC_GROUP  != "major")
      title_df <- select(title_df, matches("OCC_CODE|TITLE|OCC_CODE_SHORT"))
      title_df <- distinct(title_df, OCC_CODE, .keep_all = TRUE)
      title_df <- arrange(title_df, TITLE)
      return (title_df)
    }
    
    
    # Function to get year dataframe
    getYearData <- function(salary_total_df) {
      year_df   = distinct(salary_total_df, YEAR)
      colnames(year_df) = c("YEAR_KEY")
      year_df$YEAR_VALUE = year_df$YEAR_KEY
      colnames(year_df) = c("YEAR_KEY", "YEAR_VALUE")
      return (year_df)
    }
    
    # Step 1: Read the data from different files
    salary_2007_df <- read.csv(file="DATA/CSV/INPUT/SAL_2007.csv", head=TRUE,  sep=",", stringsAsFactors = FALSE)
    salary_2008_df <- read.csv(file="DATA/CSV/INPUT/SAL_2008.csv", head=TRUE,  sep=",", stringsAsFactors = FALSE)
    salary_2009_df <- read.csv(file="DATA/CSV/INPUT/SAL_2009.csv", head=TRUE,  sep=",", stringsAsFactors = FALSE)
    salary_2010_df <- read.csv(file="DATA/CSV/INPUT/SAL_2010.csv", head=TRUE,  sep=",", stringsAsFactors = FALSE)
    salary_2011_df <- read.csv(file="DATA/CSV/INPUT/SAL_2011.csv", head=TRUE,  sep=",", stringsAsFactors = FALSE)
    salary_2012_df <- read.csv(file="DATA/CSV/INPUT/SAL_2012.csv", head=TRUE,  sep=",", stringsAsFactors = FALSE)
    salary_2013_df <- read.csv(file="DATA/CSV/INPUT/SAL_2013.csv", head=TRUE,  sep=",", stringsAsFactors = FALSE)
    salary_2014_df <- read.csv(file="DATA/CSV/INPUT/SAL_2014.csv", head=TRUE,  sep=",", stringsAsFactors = FALSE)
    salary_2015_df <- read.csv(file="DATA/CSV/INPUT/SAL_2015.csv", head=TRUE,  sep=",", stringsAsFactors = FALSE)
    
    # Step 2: Clean all datasets
    salary_2007_df <- getCleanData(salary_2007_df)
    salary_2008_df <- getCleanData(salary_2008_df)
    salary_2009_df <- getCleanData(salary_2009_df)
    salary_2010_df <- getCleanData(salary_2010_df)
    salary_2011_df <- getCleanData(salary_2011_df)
    salary_2012_df <- getCleanData(salary_2012_df)
    salary_2013_df <- getCleanData(salary_2013_df)
    salary_2014_df <- getCleanData(salary_2014_df)
    salary_2015_df <- getCleanData(salary_2015_df)
    
    # Step 3: Check the contents of the cleaned dataframes
    printDataFrameInfo(salary_2007_df, 20)
       OCC_CODE                                          TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1061                              Anesthesiologists                     31030 192780 2007             29
    2   29_1067                                       Surgeons                     50260 191410 2007             29
    3   29_1023                                  Orthodontists                      5350 185340 2007             29
    4   29_1064                Obstetricians And Gynecologists                     21340 183600 2007             29
    5   29_1022                Oral And Maxillofacial Surgeons                      5040 178440 2007             29
    6   29_1024                                Prosthodontists                       380 169360 2007             29
    7   29_1063                            Internists, General                     46260 167270 2007             29
    8   29_1069             Physicians And Surgeons, All Other                    237400 155150 2007             29
    9   29_1062               Family And General Practitioners                    113250 153640 2007             29
    10  11_1011                               Chief Executives                    299160 151370 2007             11
    11  29_1066                                  Psychiatrists                     21790 147620 2007             29
    12  29_1021                              Dentists, General                     85260 147010 2007             29
    13  29_1065                         Pediatricians, General                     28890 145210 2007             29
    14  29_1029                Dentists, All Other Specialists                      4490 120360 2007             29
    15  29_1081                                    Podiatrists                      9320 119790 2007             29
    16  23_1011                                        Lawyers                    555770 118280 2007             23
    17  11_9041                           Engineering Managers                    184410 115610 2007             11
    18  53_2011 Airline Pilots, Copilots, And Flight Engineers                     78250 113940 2007             53
    19  17_2171                            Petroleum Engineers                     16060 113890 2007             17
    20  11_3021      Computer And Information Systems Managers                    264990 113880 2007             11
    printDataFrameInfo(salary_2008_df, 20)
       OCC_CODE                                          TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1067                                       Surgeons                     47070 206770 2008             29
    2   29_1061                              Anesthesiologists                     34230 197570 2008             29
    3   29_1023                                  Orthodontists                      5500 194930 2008             29
    4   29_1064                Obstetricians And Gynecologists                     19750 192780 2008             29
    5   29_1022                Oral And Maxillofacial Surgeons                      4760 190420 2008             29
    6   29_1063                            Internists, General                     46980 176740 2008             29
    7   29_1024                                Prosthodontists                       370 169810 2008             29
    8   29_1069             Physicians And Surgeons, All Other                    262850 165000 2008             29
    9   29_1062               Family And General Practitioners                    106210 161490 2008             29
    10  11_1011                               Chief Executives                    301930 160440 2008             11
    11  29_1021                              Dentists, General                     85910 154270 2008             29
    12  29_1066                                  Psychiatrists                     22140 154050 2008             29
    13  29_1065                         Pediatricians, General                     29170 153370 2008             29
    14  29_1029                Dentists, All Other Specialists                      4770 142070 2008             29
    15  29_1081                                    Podiatrists                      9670 125760 2008             29
    16  23_1011                                        Lawyers                    553690 124750 2008             23
    17  11_9121                      Natural Sciences Managers                     43060 123140 2008             11
    18  11_9041                           Engineering Managers                    182300 120580 2008             11
    19  53_2011 Airline Pilots, Copilots, And Flight Engineers                     77090 119750 2008             53
    20  17_2171                            Petroleum Engineers                     20880 119140 2008             17
    printDataFrameInfo(salary_2009_df, 20)
       OCC_CODE                                     TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1067                                  Surgeons                     44560 219770 2009             29
    2   29_1061                         Anesthesiologists                     37450 211750 2009             29
    3   29_1022           Oral And Maxillofacial Surgeons                      5390 210710 2009             29
    4   29_1023                             Orthodontists                      5410 206190 2009             29
    5   29_1064           Obstetricians And Gynecologists                     20380 204470 2009             29
    6   29_1063                       Internists, General                     48270 183990 2009             29
    7   29_1069        Physicians And Surgeons, All Other                    274160 173860 2009             29
    8   29_1062          Family And General Practitioners                     99000 168550 2009             29
    9   11_1011                          Chief Executives                    297640 167280 2009             11
    10  29_1066                             Psychiatrists                     22210 163660 2009             29
    11  29_1065                    Pediatricians, General                     29460 161410 2009             29
    12  29_1021                         Dentists, General                     86270 156850 2009             29
    13  29_1029           Dentists, All Other Specialists                      5010 153570 2009             29
    14  29_1081                               Podiatrists                      9720 131730 2009             29
    15  23_1011                                   Lawyers                    556790 129020 2009             23
    16  11_9121                 Natural Sciences Managers                     44180 127000 2009             11
    17  29_1024                           Prosthodontists                       660 125400 2009             29
    18  11_9041                      Engineering Managers                    178110 122810 2009             11
    19  11_3021 Computer And Information Systems Managers                    287210 120640 2009             11
    20  11_2021                        Marketing Managers                    169330 120070 2009             11
    printDataFrameInfo(salary_2010_df, 20)
       OCC_CODE                                     TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1067                                  Surgeons                     43230 225390 2010             29
    2   29_1061                         Anesthesiologists                     34820 220100 2010             29
    3   29_1022           Oral And Maxillofacial Surgeons                      5330 214120 2010             29
    4   29_1064           Obstetricians And Gynecologists                     19940 210340 2010             29
    5   29_1023                             Orthodontists                      5580 200290 2010             29
    6   29_1063                       Internists, General                     50070 189480 2010             29
    7   29_1069        Physicians And Surgeons, All Other                    293740 180870 2010             29
    8   29_1062          Family And General Practitioners                     97820 173860 2010             29
    9   11_1011                          Chief Executives                    273500 173350 2010             11
    10  29_1066                             Psychiatrists                     22690 167610 2010             29
    11  29_1065                    Pediatricians, General                     30100 165720 2010             29
    12  29_1029           Dentists, All Other Specialists                      5010 162190 2010             29
    13  29_1021                         Dentists, General                     87700 158770 2010             29
    14  29_1024                           Prosthodontists                       670 139620 2010             29
    15  29_1081                               Podiatrists                      9310 133410 2010             29
    16  23_1011                                   Lawyers                    561350 129440 2010             23
    17  11_9121                 Natural Sciences Managers                     45920 129320 2010             11
    18  17_2171                       Petroleum Engineers                     28210 127970 2010             17
    19  11_9041    Architectural And Engineering Managers                    174720 125900 2010             11
    20  11_3021 Computer And Information Systems Managers                    288660 123280 2010             11
    printDataFrameInfo(salary_2011_df, 20)
       OCC_CODE                                  TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1061                      Anesthesiologists                     33310 234950 2011             29
    2   29_1067                               Surgeons                     42340 231550 2011             29
    3   29_1064        Obstetricians And Gynecologists                     20540 218610 2011             29
    4   29_1022        Oral And Maxillofacial Surgeons                      5800 217380 2011             29
    5   29_1023                          Orthodontists                      5040 204670 2011             29
    6   29_1063                    Internists, General                     46740 189210 2011             29
    7   29_1069     Physicians And Surgeons, All Other                    305590 184650 2011             29
    8   29_1062       Family And General Practitioners                    101800 177330 2011             29
    9   11_1011                       Chief Executives                    267370 176550 2011             11
    10  29_1066                          Psychiatrists                     23140 174170 2011             29
    11  29_1065                 Pediatricians, General                     29640 168650 2011             29
    12  29_1029        Dentists, All Other Specialists                      4850 168000 2011             29
    13  29_1021                      Dentists, General                     90950 161750 2011             29
    14  17_2171                    Petroleum Engineers                     30880 138980 2011             17
    15  29_1081                            Podiatrists                      9210 133870 2011             29
    16  29_1024                        Prosthodontists                       560 130820 2011             29
    17  23_1011                                Lawyers                    570950 130490 2011             23
    18  11_9041 Architectural And Engineering Managers                    184530 129350 2011             11
    19  11_9121              Natural Sciences Managers                     47510 128230 2011             11
    20  11_2021                     Marketing Managers                    168410 126190 2011             11
    printDataFrameInfo(salary_2012_df, 20)
       OCC_CODE                                  TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1061                      Anesthesiologists                     29930 232830 2012             29
    2   29_1067                               Surgeons                     42410 230540 2012             29
    3   29_1064        Obstetricians And Gynecologists                     20880 216760 2012             29
    4   29_1022        Oral And Maxillofacial Surgeons                      4990 216440 2012             29
    5   29_1063                    Internists, General                     45210 191520 2012             29
    6   29_1023                          Orthodontists                      5530 186320 2012             29
    7   29_1069     Physicians And Surgeons, All Other                    308410 184820 2012             29
    8   29_1062       Family And General Practitioners                    110050 180850 2012             29
    9   29_1066                          Psychiatrists                     24210 177520 2012             29
    10  11_1011                       Chief Executives                    255940 176840 2012             11
    11  29_1024                        Prosthodontists                       310 168120 2012             29
    12  29_1065                 Pediatricians, General                     30560 167640 2012             29
    13  29_1029        Dentists, All Other Specialists                      5150 164780 2012             29
    14  29_1021                      Dentists, General                     93580 163240 2012             29
    15  29_1151                     Nurse Anesthetists                     34180 154390 2012             29
    16  17_2171                    Petroleum Engineers                     36410 147470 2012             17
    17  11_9041 Architectural And Engineering Managers                    187640 133240 2012             11
    18  29_1081                            Podiatrists                      9090 132470 2012             29
    19  23_1011                                Lawyers                    581920 130880 2012             23
    20  11_9121              Natural Sciences Managers                     48560 130400 2012             11
    printDataFrameInfo(salary_2013_df, 20)
       OCC_CODE                                     TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1061                         Anesthesiologists                     30200 235070 2013             29
    2   29_1067                                  Surgeons                     41030 233150 2013             29
    3   29_1022           Oral And Maxillofacial Surgeons                      5280 218960 2013             29
    4   29_1064           Obstetricians And Gynecologists                     21730 212570 2013             29
    5   29_1023                             Orthodontists                      5570 196270 2013             29
    6   29_1063                       Internists, General                     46410 188440 2013             29
    7   29_1069        Physicians And Surgeons, All Other                    307220 187200 2013             29
    8   29_1062          Family And General Practitioners                    120860 183940 2013             29
    9   29_1066                             Psychiatrists                     25040 182660 2013             29
    10  11_1011                          Chief Executives                    248760 178400 2013             11
    11  29_1065                    Pediatricians, General                     30890 170530 2013             29
    12  29_1029           Dentists, All Other Specialists                      5160 170340 2013             29
    13  29_1021                         Dentists, General                     96000 164570 2013             29
    14  29_1151                        Nurse Anesthetists                     35430 157690 2013             29
    15  17_2171                       Petroleum Engineers                     34910 149180 2013             17
    16  11_9041    Architectural And Engineering Managers                    183430 136540 2013             11
    17  29_1081                               Podiatrists                      8850 135070 2013             29
    18  11_2021                        Marketing Managers                    174010 133700 2013             11
    19  11_9121                 Natural Sciences Managers                     51900 132850 2013             11
    20  11_3021 Computer And Information Systems Managers                    319080 132570 2013             11
    printDataFrameInfo(salary_2014_df, 20)
       OCC_CODE                                  TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1061                      Anesthesiologists                     30060 246320 2014             29
    2   29_1067                               Surgeons                     41070 240440 2014             29
    3   29_1022        Oral And Maxillofacial Surgeons                      5120 219600 2014             29
    4   29_1064        Obstetricians And Gynecologists                     21740 214750 2014             29
    5   29_1023                          Orthodontists                      6190 201030 2014             29
    6   29_1063                    Internists, General                     48390 190530 2014             29
    7   29_1069     Physicians And Surgeons, All Other                    311320 189760 2014             29
    8   29_1062       Family And General Practitioners                    124810 186320 2014             29
    9   29_1066                          Psychiatrists                     25080 182700 2014             29
    10  11_1011                       Chief Executives                    246240 180700 2014             11
    11  29_1065                 Pediatricians, General                     31010 175400 2014             29
    12  29_1029        Dentists, All Other Specialists                      5450 168580 2014             29
    13  29_1021                      Dentists, General                     97990 166810 2014             29
    14  29_1151                     Nurse Anesthetists                     36590 158900 2014             29
    15  17_2171                    Petroleum Engineers                     33740 147520 2014             17
    16  29_1024                        Prosthodontists                       630 142830 2014             29
    17  11_9041 Architectural And Engineering Managers                    179320 138720 2014             11
    18  29_1081                            Podiatrists                      8910 137480 2014             29
    19  11_2021                     Marketing Managers                    184490 137400 2014             11
    20  11_9121              Natural Sciences Managers                     53290 136450 2014             11
    printDataFrameInfo(salary_2015_df, 20)
       OCC_CODE                                     TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1061                         Anesthesiologists                     29220 258100 2015             29
    2   29_1067                                  Surgeons                     41600 247520 2015             29
    3   29_1022           Oral And Maxillofacial Surgeons                      5000 233900 2015             29
    4   29_1064           Obstetricians And Gynecologists                     20090 222400 2015             29
    5   29_1023                             Orthodontists                      5410 221390 2015             29
    6   29_1069        Physicians And Surgeons, All Other                    322740 197700 2015             29
    7   29_1063                       Internists, General                     48920 196520 2015             29
    8   29_1066                             Psychiatrists                     24060 193680 2015             29
    9   29_1062          Family And General Practitioners                    127430 192120 2015             29
    10  11_1011                          Chief Executives                    238940 185850 2015             11
    11  29_1065                    Pediatricians, General                     28660 183180 2015             29
    12  29_1021                         Dentists, General                    100080 172350 2015             29
    13  29_1029           Dentists, All Other Specialists                      5550 171040 2015             29
    14  29_1024                           Prosthodontists                       710 161020 2015             29
    15  29_1151                        Nurse Anesthetists                     39410 160250 2015             29
    16  17_2171                       Petroleum Engineers                     34600 149590 2015             17
    17  11_9041    Architectural And Engineering Managers                    179770 141650 2015             11
    18  11_3021 Computer And Information Systems Managers                    341250 141000 2015             11
    19  11_2021                        Marketing Managers                    192890 140660 2015             11
    20  11_9121                 Natural Sciences Managers                     53450 136570 2015             11
    # Combine all dataframes
    salary_total_df = rbind (salary_2007_df, salary_2008_df, salary_2009_df, salary_2010_df, salary_2011_df, salary_2012_df, salary_2013_df, salary_2014_df, salary_2015_df)
    printDataFrameInfo(salary_total_df, 20)
       OCC_CODE                                          TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   29_1061                              Anesthesiologists                     31030 192780 2007             29
    2   29_1067                                       Surgeons                     50260 191410 2007             29
    3   29_1023                                  Orthodontists                      5350 185340 2007             29
    4   29_1064                Obstetricians And Gynecologists                     21340 183600 2007             29
    5   29_1022                Oral And Maxillofacial Surgeons                      5040 178440 2007             29
    6   29_1024                                Prosthodontists                       380 169360 2007             29
    7   29_1063                            Internists, General                     46260 167270 2007             29
    8   29_1069             Physicians And Surgeons, All Other                    237400 155150 2007             29
    9   29_1062               Family And General Practitioners                    113250 153640 2007             29
    10  11_1011                               Chief Executives                    299160 151370 2007             11
    11  29_1066                                  Psychiatrists                     21790 147620 2007             29
    12  29_1021                              Dentists, General                     85260 147010 2007             29
    13  29_1065                         Pediatricians, General                     28890 145210 2007             29
    14  29_1029                Dentists, All Other Specialists                      4490 120360 2007             29
    15  29_1081                                    Podiatrists                      9320 119790 2007             29
    16  23_1011                                        Lawyers                    555770 118280 2007             23
    17  11_9041                           Engineering Managers                    184410 115610 2007             11
    18  53_2011 Airline Pilots, Copilots, And Flight Engineers                     78250 113940 2007             53
    19  17_2171                            Petroleum Engineers                     16060 113890 2007             17
    20  11_3021      Computer And Information Systems Managers                    264990 113880 2007             11
    # Create salary major dataframe
    salary_major_df = filter(salary_total_df, OCC_GROUP == "major")
    printDataFrameInfo(salary_major_df, 20)
       OCC_CODE                                                      TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
    1   11_0000                                     Management Occupations     major         6003930  96150 2007             11
    2   23_0000                                          Legal Occupations     major          998590  88450 2007             23
    3   15_0000                      Computer And Mathematical Occupations     major         3191360  72190 2007             15
    4   17_0000                   Architecture And Engineering Occupations     major         2486020  68880 2007             17
    5   29_0000         Healthcare Practitioners And Technical Occupations     major         6877680  65020 2007             29
    6   13_0000              Business And Financial Operations Occupations     major         6015500  62410 2007             13
    7   19_0000             Life, Physical, And Social Science Occupations     major         1255670  62020 2007             19
    8   27_0000 Arts, Design, Entertainment, Sports, And Media Occupations     major         1761270  48410 2007             27
    9   25_0000               Education, Training, And Library Occupations     major         8316360  46610 2007             25
    10  47_0000                    Construction And Extraction Occupations     major         6708200  40620 2007             47
    11  21_0000                  Community And Social Services Occupations     major         1793040  40540 2007             21
    12  49_0000          Installation, Maintenance, And Repair Occupations     major         5390090  39930 2007             49
    13  33_0000                             Protective Service Occupations     major         3087650  38750 2007             33
    14  41_0000                              Sales And Related Occupations     major        14332020  35240 2007             41
    15  51_0000                                     Production Occupations     major        10146560  31310 2007             51
    16  43_0000              Office And Administrative Support Occupations     major        23270810  31200 2007             43
    17  53_0000             Transportation And Material Moving Occupations     major         9629030  30680 2007             53
    18  31_0000                             Healthcare Support Occupations     major         3625240  25600 2007             31
    19  39_0000                      Personal Care And Service Occupations     major         3339510  23980 2007             39
    20  37_0000  Building And Grounds Cleaning And Maintenance Occupations     major         4403900  23560 2007             37
    # Create occupation dataframe
    occupation_df = getOccupationData(salary_total_df)
    printDataFrameInfo(occupation_df, 20)
       OCC_CODE                                          TITLE OCC_CODE_SHORT
    1   17_0000                   Architecture And Engineering             17
    2   27_0000 Arts, Design, Entertainment, Sports, And Media             27
    3   37_0000  Building And Grounds Cleaning And Maintenance             37
    4   13_0000              Business And Financial Operations             13
    5   21_0000                  Community And Social Services             21
    6   15_0000                      Computer And Mathematical             15
    7   47_0000                    Construction And Extraction             47
    8   25_0000               Education, Training, And Library             25
    9   45_0000                 Farming, Fishing, And Forestry             45
    10  35_0000           Food Preparation And Serving Related             35
    11  29_0000         Healthcare Practitioners And Technical             29
    12  31_0000                             Healthcare Support             31
    13  49_0000          Installation, Maintenance, And Repair             49
    14  23_0000                                          Legal             23
    15  19_0000             Life, Physical, And Social Science             19
    16  11_0000                                     Management             11
    17  43_0000              Office And Administrative Support             43
    18  39_0000                      Personal Care And Service             39
    19  51_0000                                     Production             51
    20  33_0000                             Protective Service             33
    # Create title dataframe
    title_df = getTitleData(salary_total_df)
    printDataFrameInfo(title_df, 20)
       OCC_CODE                                                                TITLE OCC_CODE_SHORT
    1   13_2011                                             Accountants And Auditors             13
    2   27_2011                                                               Actors             27
    3   15_2011                                                            Actuaries             15
    4   23_1021        Administrative Law Judges, Adjudicators, And Hearing Officers             23
    5   11_3011                                     Administrative Services Managers             11
    6   25_3011 Adult Literacy, Remedial Education, And GED Teachers And Instructors             25
    7   11_2011                                  Advertising And Promotions Managers             11
    8   41_3011                                             Advertising Sales Agents             41
    9   17_3021                     Aerospace Engineering And Operations Technicians             17
    10  17_2011                                                  Aerospace Engineers             17
    11  13_1011    Agents And Business Managers Of Artists, Performers, And Athletes             13
    12  19_4011                            Agricultural And Food Science Technicians             19
    13  17_2021                                               Agricultural Engineers             17
    14  45_2091                                     Agricultural Equipment Operators             45
    15  45_2011                                              Agricultural Inspectors             45
    16  25_1041                        Agricultural Sciences Teachers, Postsecondary             25
    17  45_2099                                      Agricultural Workers, All Other             45
    18  53_2021                                              Air Traffic Controllers             53
    19  53_1011                                  Aircraft Cargo Handling Supervisors             53
    20  49_3011                           Aircraft Mechanics And Service Technicians             49
    # Create year dataframe
    year_df = getYearData(salary_total_df)
    printDataFrameInfo(year_df, 20)
      YEAR_KEY YEAR_VALUE
    1     2007       2007
    2     2008       2008
    3     2009       2009
    4     2010       2010
    5     2011       2011
    6     2012       2012
    7     2013       2013
    8     2014       2014
    9     2015       2015
    # Write all dataframes to CSV files
    # Write short year datasets for homework #06 to CSVs
    write.csv(get20Lines(salary_2007_df),'DATA/CSV/OUTPUT/salary_2007.csv', row.names=FALSE)
    write.csv(get20Lines(salary_2008_df),'DATA/CSV/OUTPUT/salary_2008.csv', row.names=FALSE)
    write.csv(get20Lines(salary_2009_df),'DATA/CSV/OUTPUT/salary_2009.csv', row.names=FALSE)
    write.csv(get20Lines(salary_2010_df),'DATA/CSV/OUTPUT/salary_2010.csv', row.names=FALSE)
    write.csv(get20Lines(salary_2011_df),'DATA/CSV/OUTPUT/salary_2011.csv', row.names=FALSE)
    write.csv(get20Lines(salary_2012_df),'DATA/CSV/OUTPUT/salary_2012.csv', row.names=FALSE)
    write.csv(get20Lines(salary_2013_df),'DATA/CSV/OUTPUT/salary_2013.csv', row.names=FALSE)
    write.csv(get20Lines(salary_2014_df),'DATA/CSV/OUTPUT/salary_2014.csv', row.names=FALSE)
    write.csv(get20Lines(salary_2015_df),'DATA/CSV/OUTPUT/salary_2015.csv', row.names=FALSE)
    
    # Write salary_total_df dataframe to CSV
    write.csv(salary_total_df,'DATA/CSV/OUTPUT/salary_total_df.csv', row.names=FALSE)
    
    # Write salary_major_df dataframe to CSV
    write.csv(salary_major_df,'DATA/CSV/OUTPUT/salary_major_df.csv', row.names=FALSE)
    
    # Write occupation_df dataframe to CSV
    write.csv(occupation_df,'DATA/CSV/OUTPUT/occupation_df.csv', row.names=FALSE)
    
    # Write title_df dataframe to CSV
    write.csv(title_df,'DATA/CSV/OUTPUT/title_df.csv', row.names=FALSE)
    
    # Write year_df dataframe to CSV
    write.csv(year_df,'DATA/CSV/OUTPUT/year_df.csv', row.names=FALSE)