library(dplyr)
library(stringr)
To create a Shiny App/visualization that displays the relationship between different occupations and salary for many years (2007 to 2015).
In this project we would like to find those occupations that pay higher wages and salaries.
The following functions are used to prepare data for the shiny app. All the CSV files for years 2007 to 2015 and read and cleaned.
# Function to convert the first Character to Uppercase
simpleCap <- function(x) {
s <- strsplit(x, " ")[[1]]
paste(toupper(substring(s, 1,1)), substring(s, 2), sep="", collapse=" ")
}
# Function to clean the data
getCleanData <- function(salary_year_df) {
salary_year_df <- select(salary_year_df, matches("OCC_CODE|TITLE|OCC_GROUP|TOTAL_EMPLOYEES|SALARY|YEAR"))
salary_year_df$OCC_CODE = gsub("\\-", "_", salary_year_df$OCC_CODE)
salary_year_df$TITLE = sapply(salary_year_df$TITLE, simpleCap)
salary_year_df$TOTAL_EMPLOYEES <- as.numeric(gsub(",", "", salary_year_df$TOTAL_EMPLOYEES))
salary_year_df$SALARY <- as.numeric(gsub(",", "", salary_year_df$SALARY))
salary_year_df$TITLE <- gsub("\\*", "", salary_year_df$TITLE)
salary_year_df$TITLE <- gsub("Computer And Information Scientists, Research",
"Computer And Information Research Scientists", salary_year_df$TITLE)
salary_year_df$OCC_CODE <- gsub("29_1121", "29_1181", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("21_1099", "21_1798", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("15_1011", "15_1111", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("15_1199", "15_1199", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("15_1021", "15_1131", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("15_1041", "15_1150", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("15_1051", "15_1121", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("47_4099", "47_4799", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("15_1061", "15_1141", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("51_5022", "51_5111", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("51_9199", "51_9399", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("29_2034", "29_2037", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("29_1111", "29_1141", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("41_9099", "41_9799", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("25_3099", "25_3999", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("11_3042", "11_3131", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("13_1073", "13_1151", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE <- gsub("39_6022", "39_7012", salary_year_df$OCC_CODE)
salary_year_df$OCC_CODE_SHORT <- str_sub(as.character(salary_year_df$OCC_CODE), 1, 2)
salary_year_df <- arrange(salary_year_df, desc(SALARY))
return (salary_year_df)
}
# Function to get the top 20 rows of the dataset
get20Lines <- function(salary_year_df) {
return (head(salary_year_df, 20))
}
# Function to print the dataset
printDataFrameInfo <- function(salary_year_df, numberofRows) {
ncol(salary_year_df)
nrow(salary_year_df)
head(salary_year_df, numberofRows)
}
# Function to get all occupations (majors)
getOccupationData <- function(salary_total_df) {
occupation_df <- filter(salary_total_df, OCC_GROUP == "major")
occupation_df$TITLE <- gsub(" Occupations", "", occupation_df$TITLE)
occupation_df <- arrange(occupation_df, OCC_CODE)
occupation_df <- distinct(occupation_df, OCC_CODE, .keep_all = TRUE)
occupation_df <- arrange(occupation_df, TITLE)
occupation_df <- select(occupation_df, matches("OCC_CODE|TITLE|OCC_CODE_SHORT"))
return (occupation_df)
}
# Function to get all occupation titles (not majors)
getTitleData <- function(salary_total_df) {
title_df <- filter(salary_total_df, OCC_GROUP != "major")
title_df <- select(title_df, matches("OCC_CODE|TITLE|OCC_CODE_SHORT"))
title_df <- distinct(title_df, OCC_CODE, .keep_all = TRUE)
title_df <- arrange(title_df, TITLE)
return (title_df)
}
# Function to get year dataframe
getYearData <- function(salary_total_df) {
year_df = distinct(salary_total_df, YEAR)
colnames(year_df) = c("YEAR_KEY")
year_df$YEAR_VALUE = year_df$YEAR_KEY
colnames(year_df) = c("YEAR_KEY", "YEAR_VALUE")
return (year_df)
}
# Step 1: Read the data from different files
salary_2007_df <- read.csv(file="DATA/CSV/INPUT/SAL_2007.csv", head=TRUE, sep=",", stringsAsFactors = FALSE)
salary_2008_df <- read.csv(file="DATA/CSV/INPUT/SAL_2008.csv", head=TRUE, sep=",", stringsAsFactors = FALSE)
salary_2009_df <- read.csv(file="DATA/CSV/INPUT/SAL_2009.csv", head=TRUE, sep=",", stringsAsFactors = FALSE)
salary_2010_df <- read.csv(file="DATA/CSV/INPUT/SAL_2010.csv", head=TRUE, sep=",", stringsAsFactors = FALSE)
salary_2011_df <- read.csv(file="DATA/CSV/INPUT/SAL_2011.csv", head=TRUE, sep=",", stringsAsFactors = FALSE)
salary_2012_df <- read.csv(file="DATA/CSV/INPUT/SAL_2012.csv", head=TRUE, sep=",", stringsAsFactors = FALSE)
salary_2013_df <- read.csv(file="DATA/CSV/INPUT/SAL_2013.csv", head=TRUE, sep=",", stringsAsFactors = FALSE)
salary_2014_df <- read.csv(file="DATA/CSV/INPUT/SAL_2014.csv", head=TRUE, sep=",", stringsAsFactors = FALSE)
salary_2015_df <- read.csv(file="DATA/CSV/INPUT/SAL_2015.csv", head=TRUE, sep=",", stringsAsFactors = FALSE)
# Step 2: Clean all datasets
salary_2007_df <- getCleanData(salary_2007_df)
salary_2008_df <- getCleanData(salary_2008_df)
salary_2009_df <- getCleanData(salary_2009_df)
salary_2010_df <- getCleanData(salary_2010_df)
salary_2011_df <- getCleanData(salary_2011_df)
salary_2012_df <- getCleanData(salary_2012_df)
salary_2013_df <- getCleanData(salary_2013_df)
salary_2014_df <- getCleanData(salary_2014_df)
salary_2015_df <- getCleanData(salary_2015_df)
# Step 3: Check the contents of the cleaned dataframes
printDataFrameInfo(salary_2007_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1061 Anesthesiologists 31030 192780 2007 29
2 29_1067 Surgeons 50260 191410 2007 29
3 29_1023 Orthodontists 5350 185340 2007 29
4 29_1064 Obstetricians And Gynecologists 21340 183600 2007 29
5 29_1022 Oral And Maxillofacial Surgeons 5040 178440 2007 29
6 29_1024 Prosthodontists 380 169360 2007 29
7 29_1063 Internists, General 46260 167270 2007 29
8 29_1069 Physicians And Surgeons, All Other 237400 155150 2007 29
9 29_1062 Family And General Practitioners 113250 153640 2007 29
10 11_1011 Chief Executives 299160 151370 2007 11
11 29_1066 Psychiatrists 21790 147620 2007 29
12 29_1021 Dentists, General 85260 147010 2007 29
13 29_1065 Pediatricians, General 28890 145210 2007 29
14 29_1029 Dentists, All Other Specialists 4490 120360 2007 29
15 29_1081 Podiatrists 9320 119790 2007 29
16 23_1011 Lawyers 555770 118280 2007 23
17 11_9041 Engineering Managers 184410 115610 2007 11
18 53_2011 Airline Pilots, Copilots, And Flight Engineers 78250 113940 2007 53
19 17_2171 Petroleum Engineers 16060 113890 2007 17
20 11_3021 Computer And Information Systems Managers 264990 113880 2007 11
printDataFrameInfo(salary_2008_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1067 Surgeons 47070 206770 2008 29
2 29_1061 Anesthesiologists 34230 197570 2008 29
3 29_1023 Orthodontists 5500 194930 2008 29
4 29_1064 Obstetricians And Gynecologists 19750 192780 2008 29
5 29_1022 Oral And Maxillofacial Surgeons 4760 190420 2008 29
6 29_1063 Internists, General 46980 176740 2008 29
7 29_1024 Prosthodontists 370 169810 2008 29
8 29_1069 Physicians And Surgeons, All Other 262850 165000 2008 29
9 29_1062 Family And General Practitioners 106210 161490 2008 29
10 11_1011 Chief Executives 301930 160440 2008 11
11 29_1021 Dentists, General 85910 154270 2008 29
12 29_1066 Psychiatrists 22140 154050 2008 29
13 29_1065 Pediatricians, General 29170 153370 2008 29
14 29_1029 Dentists, All Other Specialists 4770 142070 2008 29
15 29_1081 Podiatrists 9670 125760 2008 29
16 23_1011 Lawyers 553690 124750 2008 23
17 11_9121 Natural Sciences Managers 43060 123140 2008 11
18 11_9041 Engineering Managers 182300 120580 2008 11
19 53_2011 Airline Pilots, Copilots, And Flight Engineers 77090 119750 2008 53
20 17_2171 Petroleum Engineers 20880 119140 2008 17
printDataFrameInfo(salary_2009_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1067 Surgeons 44560 219770 2009 29
2 29_1061 Anesthesiologists 37450 211750 2009 29
3 29_1022 Oral And Maxillofacial Surgeons 5390 210710 2009 29
4 29_1023 Orthodontists 5410 206190 2009 29
5 29_1064 Obstetricians And Gynecologists 20380 204470 2009 29
6 29_1063 Internists, General 48270 183990 2009 29
7 29_1069 Physicians And Surgeons, All Other 274160 173860 2009 29
8 29_1062 Family And General Practitioners 99000 168550 2009 29
9 11_1011 Chief Executives 297640 167280 2009 11
10 29_1066 Psychiatrists 22210 163660 2009 29
11 29_1065 Pediatricians, General 29460 161410 2009 29
12 29_1021 Dentists, General 86270 156850 2009 29
13 29_1029 Dentists, All Other Specialists 5010 153570 2009 29
14 29_1081 Podiatrists 9720 131730 2009 29
15 23_1011 Lawyers 556790 129020 2009 23
16 11_9121 Natural Sciences Managers 44180 127000 2009 11
17 29_1024 Prosthodontists 660 125400 2009 29
18 11_9041 Engineering Managers 178110 122810 2009 11
19 11_3021 Computer And Information Systems Managers 287210 120640 2009 11
20 11_2021 Marketing Managers 169330 120070 2009 11
printDataFrameInfo(salary_2010_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1067 Surgeons 43230 225390 2010 29
2 29_1061 Anesthesiologists 34820 220100 2010 29
3 29_1022 Oral And Maxillofacial Surgeons 5330 214120 2010 29
4 29_1064 Obstetricians And Gynecologists 19940 210340 2010 29
5 29_1023 Orthodontists 5580 200290 2010 29
6 29_1063 Internists, General 50070 189480 2010 29
7 29_1069 Physicians And Surgeons, All Other 293740 180870 2010 29
8 29_1062 Family And General Practitioners 97820 173860 2010 29
9 11_1011 Chief Executives 273500 173350 2010 11
10 29_1066 Psychiatrists 22690 167610 2010 29
11 29_1065 Pediatricians, General 30100 165720 2010 29
12 29_1029 Dentists, All Other Specialists 5010 162190 2010 29
13 29_1021 Dentists, General 87700 158770 2010 29
14 29_1024 Prosthodontists 670 139620 2010 29
15 29_1081 Podiatrists 9310 133410 2010 29
16 23_1011 Lawyers 561350 129440 2010 23
17 11_9121 Natural Sciences Managers 45920 129320 2010 11
18 17_2171 Petroleum Engineers 28210 127970 2010 17
19 11_9041 Architectural And Engineering Managers 174720 125900 2010 11
20 11_3021 Computer And Information Systems Managers 288660 123280 2010 11
printDataFrameInfo(salary_2011_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1061 Anesthesiologists 33310 234950 2011 29
2 29_1067 Surgeons 42340 231550 2011 29
3 29_1064 Obstetricians And Gynecologists 20540 218610 2011 29
4 29_1022 Oral And Maxillofacial Surgeons 5800 217380 2011 29
5 29_1023 Orthodontists 5040 204670 2011 29
6 29_1063 Internists, General 46740 189210 2011 29
7 29_1069 Physicians And Surgeons, All Other 305590 184650 2011 29
8 29_1062 Family And General Practitioners 101800 177330 2011 29
9 11_1011 Chief Executives 267370 176550 2011 11
10 29_1066 Psychiatrists 23140 174170 2011 29
11 29_1065 Pediatricians, General 29640 168650 2011 29
12 29_1029 Dentists, All Other Specialists 4850 168000 2011 29
13 29_1021 Dentists, General 90950 161750 2011 29
14 17_2171 Petroleum Engineers 30880 138980 2011 17
15 29_1081 Podiatrists 9210 133870 2011 29
16 29_1024 Prosthodontists 560 130820 2011 29
17 23_1011 Lawyers 570950 130490 2011 23
18 11_9041 Architectural And Engineering Managers 184530 129350 2011 11
19 11_9121 Natural Sciences Managers 47510 128230 2011 11
20 11_2021 Marketing Managers 168410 126190 2011 11
printDataFrameInfo(salary_2012_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1061 Anesthesiologists 29930 232830 2012 29
2 29_1067 Surgeons 42410 230540 2012 29
3 29_1064 Obstetricians And Gynecologists 20880 216760 2012 29
4 29_1022 Oral And Maxillofacial Surgeons 4990 216440 2012 29
5 29_1063 Internists, General 45210 191520 2012 29
6 29_1023 Orthodontists 5530 186320 2012 29
7 29_1069 Physicians And Surgeons, All Other 308410 184820 2012 29
8 29_1062 Family And General Practitioners 110050 180850 2012 29
9 29_1066 Psychiatrists 24210 177520 2012 29
10 11_1011 Chief Executives 255940 176840 2012 11
11 29_1024 Prosthodontists 310 168120 2012 29
12 29_1065 Pediatricians, General 30560 167640 2012 29
13 29_1029 Dentists, All Other Specialists 5150 164780 2012 29
14 29_1021 Dentists, General 93580 163240 2012 29
15 29_1151 Nurse Anesthetists 34180 154390 2012 29
16 17_2171 Petroleum Engineers 36410 147470 2012 17
17 11_9041 Architectural And Engineering Managers 187640 133240 2012 11
18 29_1081 Podiatrists 9090 132470 2012 29
19 23_1011 Lawyers 581920 130880 2012 23
20 11_9121 Natural Sciences Managers 48560 130400 2012 11
printDataFrameInfo(salary_2013_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1061 Anesthesiologists 30200 235070 2013 29
2 29_1067 Surgeons 41030 233150 2013 29
3 29_1022 Oral And Maxillofacial Surgeons 5280 218960 2013 29
4 29_1064 Obstetricians And Gynecologists 21730 212570 2013 29
5 29_1023 Orthodontists 5570 196270 2013 29
6 29_1063 Internists, General 46410 188440 2013 29
7 29_1069 Physicians And Surgeons, All Other 307220 187200 2013 29
8 29_1062 Family And General Practitioners 120860 183940 2013 29
9 29_1066 Psychiatrists 25040 182660 2013 29
10 11_1011 Chief Executives 248760 178400 2013 11
11 29_1065 Pediatricians, General 30890 170530 2013 29
12 29_1029 Dentists, All Other Specialists 5160 170340 2013 29
13 29_1021 Dentists, General 96000 164570 2013 29
14 29_1151 Nurse Anesthetists 35430 157690 2013 29
15 17_2171 Petroleum Engineers 34910 149180 2013 17
16 11_9041 Architectural And Engineering Managers 183430 136540 2013 11
17 29_1081 Podiatrists 8850 135070 2013 29
18 11_2021 Marketing Managers 174010 133700 2013 11
19 11_9121 Natural Sciences Managers 51900 132850 2013 11
20 11_3021 Computer And Information Systems Managers 319080 132570 2013 11
printDataFrameInfo(salary_2014_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1061 Anesthesiologists 30060 246320 2014 29
2 29_1067 Surgeons 41070 240440 2014 29
3 29_1022 Oral And Maxillofacial Surgeons 5120 219600 2014 29
4 29_1064 Obstetricians And Gynecologists 21740 214750 2014 29
5 29_1023 Orthodontists 6190 201030 2014 29
6 29_1063 Internists, General 48390 190530 2014 29
7 29_1069 Physicians And Surgeons, All Other 311320 189760 2014 29
8 29_1062 Family And General Practitioners 124810 186320 2014 29
9 29_1066 Psychiatrists 25080 182700 2014 29
10 11_1011 Chief Executives 246240 180700 2014 11
11 29_1065 Pediatricians, General 31010 175400 2014 29
12 29_1029 Dentists, All Other Specialists 5450 168580 2014 29
13 29_1021 Dentists, General 97990 166810 2014 29
14 29_1151 Nurse Anesthetists 36590 158900 2014 29
15 17_2171 Petroleum Engineers 33740 147520 2014 17
16 29_1024 Prosthodontists 630 142830 2014 29
17 11_9041 Architectural And Engineering Managers 179320 138720 2014 11
18 29_1081 Podiatrists 8910 137480 2014 29
19 11_2021 Marketing Managers 184490 137400 2014 11
20 11_9121 Natural Sciences Managers 53290 136450 2014 11
printDataFrameInfo(salary_2015_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1061 Anesthesiologists 29220 258100 2015 29
2 29_1067 Surgeons 41600 247520 2015 29
3 29_1022 Oral And Maxillofacial Surgeons 5000 233900 2015 29
4 29_1064 Obstetricians And Gynecologists 20090 222400 2015 29
5 29_1023 Orthodontists 5410 221390 2015 29
6 29_1069 Physicians And Surgeons, All Other 322740 197700 2015 29
7 29_1063 Internists, General 48920 196520 2015 29
8 29_1066 Psychiatrists 24060 193680 2015 29
9 29_1062 Family And General Practitioners 127430 192120 2015 29
10 11_1011 Chief Executives 238940 185850 2015 11
11 29_1065 Pediatricians, General 28660 183180 2015 29
12 29_1021 Dentists, General 100080 172350 2015 29
13 29_1029 Dentists, All Other Specialists 5550 171040 2015 29
14 29_1024 Prosthodontists 710 161020 2015 29
15 29_1151 Nurse Anesthetists 39410 160250 2015 29
16 17_2171 Petroleum Engineers 34600 149590 2015 17
17 11_9041 Architectural And Engineering Managers 179770 141650 2015 11
18 11_3021 Computer And Information Systems Managers 341250 141000 2015 11
19 11_2021 Marketing Managers 192890 140660 2015 11
20 11_9121 Natural Sciences Managers 53450 136570 2015 11
# Combine all dataframes
salary_total_df = rbind (salary_2007_df, salary_2008_df, salary_2009_df, salary_2010_df, salary_2011_df, salary_2012_df, salary_2013_df, salary_2014_df, salary_2015_df)
printDataFrameInfo(salary_total_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 29_1061 Anesthesiologists 31030 192780 2007 29
2 29_1067 Surgeons 50260 191410 2007 29
3 29_1023 Orthodontists 5350 185340 2007 29
4 29_1064 Obstetricians And Gynecologists 21340 183600 2007 29
5 29_1022 Oral And Maxillofacial Surgeons 5040 178440 2007 29
6 29_1024 Prosthodontists 380 169360 2007 29
7 29_1063 Internists, General 46260 167270 2007 29
8 29_1069 Physicians And Surgeons, All Other 237400 155150 2007 29
9 29_1062 Family And General Practitioners 113250 153640 2007 29
10 11_1011 Chief Executives 299160 151370 2007 11
11 29_1066 Psychiatrists 21790 147620 2007 29
12 29_1021 Dentists, General 85260 147010 2007 29
13 29_1065 Pediatricians, General 28890 145210 2007 29
14 29_1029 Dentists, All Other Specialists 4490 120360 2007 29
15 29_1081 Podiatrists 9320 119790 2007 29
16 23_1011 Lawyers 555770 118280 2007 23
17 11_9041 Engineering Managers 184410 115610 2007 11
18 53_2011 Airline Pilots, Copilots, And Flight Engineers 78250 113940 2007 53
19 17_2171 Petroleum Engineers 16060 113890 2007 17
20 11_3021 Computer And Information Systems Managers 264990 113880 2007 11
# Create salary major dataframe
salary_major_df = filter(salary_total_df, OCC_GROUP == "major")
printDataFrameInfo(salary_major_df, 20)
OCC_CODE TITLE OCC_GROUP TOTAL_EMPLOYEES SALARY YEAR OCC_CODE_SHORT
1 11_0000 Management Occupations major 6003930 96150 2007 11
2 23_0000 Legal Occupations major 998590 88450 2007 23
3 15_0000 Computer And Mathematical Occupations major 3191360 72190 2007 15
4 17_0000 Architecture And Engineering Occupations major 2486020 68880 2007 17
5 29_0000 Healthcare Practitioners And Technical Occupations major 6877680 65020 2007 29
6 13_0000 Business And Financial Operations Occupations major 6015500 62410 2007 13
7 19_0000 Life, Physical, And Social Science Occupations major 1255670 62020 2007 19
8 27_0000 Arts, Design, Entertainment, Sports, And Media Occupations major 1761270 48410 2007 27
9 25_0000 Education, Training, And Library Occupations major 8316360 46610 2007 25
10 47_0000 Construction And Extraction Occupations major 6708200 40620 2007 47
11 21_0000 Community And Social Services Occupations major 1793040 40540 2007 21
12 49_0000 Installation, Maintenance, And Repair Occupations major 5390090 39930 2007 49
13 33_0000 Protective Service Occupations major 3087650 38750 2007 33
14 41_0000 Sales And Related Occupations major 14332020 35240 2007 41
15 51_0000 Production Occupations major 10146560 31310 2007 51
16 43_0000 Office And Administrative Support Occupations major 23270810 31200 2007 43
17 53_0000 Transportation And Material Moving Occupations major 9629030 30680 2007 53
18 31_0000 Healthcare Support Occupations major 3625240 25600 2007 31
19 39_0000 Personal Care And Service Occupations major 3339510 23980 2007 39
20 37_0000 Building And Grounds Cleaning And Maintenance Occupations major 4403900 23560 2007 37
# Create occupation dataframe
occupation_df = getOccupationData(salary_total_df)
printDataFrameInfo(occupation_df, 20)
OCC_CODE TITLE OCC_CODE_SHORT
1 17_0000 Architecture And Engineering 17
2 27_0000 Arts, Design, Entertainment, Sports, And Media 27
3 37_0000 Building And Grounds Cleaning And Maintenance 37
4 13_0000 Business And Financial Operations 13
5 21_0000 Community And Social Services 21
6 15_0000 Computer And Mathematical 15
7 47_0000 Construction And Extraction 47
8 25_0000 Education, Training, And Library 25
9 45_0000 Farming, Fishing, And Forestry 45
10 35_0000 Food Preparation And Serving Related 35
11 29_0000 Healthcare Practitioners And Technical 29
12 31_0000 Healthcare Support 31
13 49_0000 Installation, Maintenance, And Repair 49
14 23_0000 Legal 23
15 19_0000 Life, Physical, And Social Science 19
16 11_0000 Management 11
17 43_0000 Office And Administrative Support 43
18 39_0000 Personal Care And Service 39
19 51_0000 Production 51
20 33_0000 Protective Service 33
# Create title dataframe
title_df = getTitleData(salary_total_df)
printDataFrameInfo(title_df, 20)
OCC_CODE TITLE OCC_CODE_SHORT
1 13_2011 Accountants And Auditors 13
2 27_2011 Actors 27
3 15_2011 Actuaries 15
4 23_1021 Administrative Law Judges, Adjudicators, And Hearing Officers 23
5 11_3011 Administrative Services Managers 11
6 25_3011 Adult Literacy, Remedial Education, And GED Teachers And Instructors 25
7 11_2011 Advertising And Promotions Managers 11
8 41_3011 Advertising Sales Agents 41
9 17_3021 Aerospace Engineering And Operations Technicians 17
10 17_2011 Aerospace Engineers 17
11 13_1011 Agents And Business Managers Of Artists, Performers, And Athletes 13
12 19_4011 Agricultural And Food Science Technicians 19
13 17_2021 Agricultural Engineers 17
14 45_2091 Agricultural Equipment Operators 45
15 45_2011 Agricultural Inspectors 45
16 25_1041 Agricultural Sciences Teachers, Postsecondary 25
17 45_2099 Agricultural Workers, All Other 45
18 53_2021 Air Traffic Controllers 53
19 53_1011 Aircraft Cargo Handling Supervisors 53
20 49_3011 Aircraft Mechanics And Service Technicians 49
# Create year dataframe
year_df = getYearData(salary_total_df)
printDataFrameInfo(year_df, 20)
YEAR_KEY YEAR_VALUE
1 2007 2007
2 2008 2008
3 2009 2009
4 2010 2010
5 2011 2011
6 2012 2012
7 2013 2013
8 2014 2014
9 2015 2015
# Write all dataframes to CSV files
# Write short year datasets for homework #06 to CSVs
write.csv(get20Lines(salary_2007_df),'DATA/CSV/OUTPUT/salary_2007.csv', row.names=FALSE)
write.csv(get20Lines(salary_2008_df),'DATA/CSV/OUTPUT/salary_2008.csv', row.names=FALSE)
write.csv(get20Lines(salary_2009_df),'DATA/CSV/OUTPUT/salary_2009.csv', row.names=FALSE)
write.csv(get20Lines(salary_2010_df),'DATA/CSV/OUTPUT/salary_2010.csv', row.names=FALSE)
write.csv(get20Lines(salary_2011_df),'DATA/CSV/OUTPUT/salary_2011.csv', row.names=FALSE)
write.csv(get20Lines(salary_2012_df),'DATA/CSV/OUTPUT/salary_2012.csv', row.names=FALSE)
write.csv(get20Lines(salary_2013_df),'DATA/CSV/OUTPUT/salary_2013.csv', row.names=FALSE)
write.csv(get20Lines(salary_2014_df),'DATA/CSV/OUTPUT/salary_2014.csv', row.names=FALSE)
write.csv(get20Lines(salary_2015_df),'DATA/CSV/OUTPUT/salary_2015.csv', row.names=FALSE)
# Write salary_total_df dataframe to CSV
write.csv(salary_total_df,'DATA/CSV/OUTPUT/salary_total_df.csv', row.names=FALSE)
# Write salary_major_df dataframe to CSV
write.csv(salary_major_df,'DATA/CSV/OUTPUT/salary_major_df.csv', row.names=FALSE)
# Write occupation_df dataframe to CSV
write.csv(occupation_df,'DATA/CSV/OUTPUT/occupation_df.csv', row.names=FALSE)
# Write title_df dataframe to CSV
write.csv(title_df,'DATA/CSV/OUTPUT/title_df.csv', row.names=FALSE)
# Write year_df dataframe to CSV
write.csv(year_df,'DATA/CSV/OUTPUT/year_df.csv', row.names=FALSE)