** Use of visualization to conduct visual analytics on data analysis Provide answer and question to get the result of analysis Provide user-friendly interface to explore the data as well as find insights from it **
** Use shiny app provides efficient ways to manipulate and visualize data To explore salary data of 8 professions based on the data about prevailing wage of foreign employers in the United States. The 8 professions include: Data scientist, Software engineers, Data Analyst, Business Analyst, Management Consultant, Assistant Professor, Attorney, Teacher **
** The dataset is from the United States Department of Labor, Employment & Training Administration https://www.foreignlaborcert.doleta.gov/performancedata.cfm The filtered data for this application contains total 167,278 cases (in 17 columns) in 2015 It is about the prevailing wage data of foreign employers seeking to file applications in the Permanent Labor Certification Program prevailing wage data of US natives are not included **
library(knitr)
library(dplyr)
library(ggplot2)
library(plotly)
library(sqldf)
library(googleVis)
#x <- read.csv("salary.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE)
load("salary.Rdata")
head(df)
## JOB_TITLE_SUBGROUP JOB_TITLE
## 1 software engineer SOFTWARE ENGINEER
## 2 assistant professor Assistant Professor of Marketing
## 3 teacher SPECIAL EDUCATION TEACHER
## 4 teacher SCIENCE TEACHER
## 5 software engineer SENIOR SOFTWARE ENGINEER
## 6 business analyst ORACLE SCM ANALYST/BUSINESS ANALYST
## PREVAILING_WAGE_SOC_TITLE
## 1 Software Developers, Applications
## 2 Business Teachers, Postsecondary
## 3 Special Education Teachers, Kindergarten and Eleme
## 4 Biological Science Teachers, Postsecondary
## 5 Software Developers, Systems Software
## 6 Computer Systems Analysts
## EMPLOYER_NAME PAID_WAGE_PER_YEAR
## 1 ADVANCED TECHNOLOGY GROUP USA, INC. 62171
## 2 SAN FRANCISCO STATE UNIVERSITY 91440
## 3 CAROUSEL SCHOOL 49470
## 4 HARLINGEN CONSOLIDATED INDEPENDENT SCHOOL DISTRICT 43800
## 5 SIGNAL SCIENCES CORPORATION 170000
## 6 CAPGEMINI U.S. LLC 114421
## PREVAILING_WAGE_PER_YEAR WORK_CITY WORK_STATE
## 1 NA BLOOMINGTON Illinois
## 2 NA SAN FRANCISCO California
## 3 NA LOS ANGELES California
## 4 NA HARLINGEN CISD Texas
## 5 NA PORTLAND Oregon
## 6 NA SOUTH SAN FRANCISCO California
## WORK_STATE_ABBREVIATION WORK_POSTAL_CODE FULL_TIME_POSITION_Y_N
## 1 IL <NA> y
## 2 CA 94132.0 <NA>
## 3 CA <NA> y
## 4 TX <NA> y
## 5 OR <NA> y
## 6 CA <NA> y
## EXPERIENCE_REQUIRED_Y_N EXPERIENCE_REQUIRED_NUM_MONTHS
## 1 <NA> NA
## 2 n NA
## 3 <NA> NA
## 4 <NA> NA
## 5 <NA> NA
## 6 <NA> NA
## EDUCATION_LEVEL_REQUIRED COLLEGE_MAJOR_REQUIRED VISA_CLASS
## 1 <NA> <NA> H-1B
## 2 Doctorate marketing greencard
## 3 <NA> <NA> H-1B
## 4 <NA> <NA> H-1B
## 5 <NA> <NA> E-3 Australian
## 6 <NA> <NA> H-1B
## COUNTRY_OF_CITIZENSHIP
## 1 <NA>
## 2 IRAN
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
arr <- df[,c(8,5)]
colnames(arr) <- c("region", "WAGE")
arr <- aggregate(arr[, 2], list(arr$region), mean)
colnames(arr) <- c("region", "WAGE")
arr$WAGE<-round(arr$WAGE)
salarylevel <- arr %>% arrange(desc(WAGE))
salarylevel
## region WAGE
## 1 West Virginia 109427
## 2 California 103571
## 3 Washington 102177
## 4 New York 91602
## 5 Arkansas 90271
## 6 Alabama 87326
## 7 Massachusetts 86611
## 8 Pennsylvania 83889
## 9 District of Columbia 81968
## 10 Mississippi 81951
## 11 Oregon 81531
## 12 Kansas 81032
## 13 Kentucky 80147
## 14 Connecticut 79579
## 15 Maryland 79154
## 16 Nevada 79152
## 17 Indiana 78723
## 18 Iowa 78219
## 19 Wisconsin 77729
## 20 New Hampshire 77435
## 21 Utah 77240
## 22 Illinois 77114
## 23 New Jersey 76371
## 24 Missouri 75495
## 25 Minnesota 75386
## 26 Colorado 75155
## 27 Virginia 74921
## 28 Alaska 74792
## 29 Ohio 74777
## 30 North Carolina 74667
## 31 Rhode Island 74114
## 32 Michigan 73813
## 33 Texas 72766
## 34 Nebraska 72600
## 35 Vermont 72542
## 36 Florida 72339
## 37 Georgia 72288
## 38 Delaware 71830
## 39 Hawaii 71223
## 40 Maine 71180
## 41 Arizona 70964
## 42 Tennessee 70047
## 43 Oklahoma 68444
## 44 Idaho 68074
## 45 North Dakota 67486
## 46 Louisiana 67124
## 47 Wyoming 66189
## 48 Montana 65991
## 49 South Dakota 61421
## 50 South Carolina 61376
## 51 Palau 60000
## 52 New Mexico 56642
## 53 Puerto Rico 53041
## 54 Guamam 48557
## 55 Virgin Islands 41972
## 56 Guam 39785
## 57 Northern Mariana Islands 18932
ordered_states <- rev(salarylevel$region)
salarylevel$region <- factor(salarylevel$region, levels = ordered_states)
ggplot(salarylevel, aes(x = region, y = WAGE)) + geom_bar(stat = "identity", alpha = 0.5, col = "blue") + coord_flip() + ggtitle("Wage level per State\n") + scale_y_continuous("Paid Wage") + scale_x_discrete("Region")
ggplot(df, aes(x=WORK_STATE_ABBREVIATION, y=PAID_WAGE_PER_YEAR, color=JOB_TITLE_SUBGROUP, shape=JOB_TITLE_SUBGROUP)) + geom_point(size=6, alpha=0.6)
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 8.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 113276 rows containing missing values (geom_point).
GoogleVis Plotly GGplot2 Sqldf
Plotly Knitr Dplyr Plyr MASS Reshape2 Ggplot2 Graphics Ggthemes GoogleVis etc