Objective

In this project I am planning to create a public visualization using data relevant to a Employment data over year by sectors/idustry and state, my objective is to give an ability to comapre diffrenct sectors over the year and visualise it.

Proposed visualization

  • Ability to compare Sectors and growth per sector over different state by Year

I am planning build a shiny app which gives ability to select sector or more and compare them against one or more state.

I would see how I can use map to summaries the information across different state for the sectors in some limited scales or quantiles( i.e. which state falls in which quantiles of the growth)

  • Ability to check information of growth over the year for the give state and across state. I need to see how best I can display the yearly information in one view with exact information.

Analysis to find insights in the data

  • Which sector constantly grew over time, vs which sector didn’t grew.

About Employment Data

Data source
  • 1/ The estimates of employment for 1998-2006 are based on the 2002 North American Industry Classification System (NAICS). The estimates for 2007-2010 are based on the 2007 NAICS. The estimates for 2011-2016 are based on the 2012 NAICS. The estimates for 2017 forward are based on the 2017 NAICS.
  • 2/ Excludes limited partners.
  • 3/ Under the 2007 NAICS, internet publishing and broadcasting was reclassified to other information services.
  • (D) Not shown to avoid disclosure of confidential information; estimates are included in higher-level totals.
  • (T) Estimate for employment suppressed to cover corresponding estimate for earnings. Estimates for this item are included in the total.
  • (NA) Not available.
  • (NM) Not meaningful.
  •  Last updated: September 24, 2019– new statistics for 2018; revised statistics for 2014-2017.

Data Work

EMP_DATA_FILE <- "EMP9818.csv"
EMP_DATA <- read_delim(EMP_DATA_FILE,delim = ",",col_names = TRUE,trim_ws= TRUE)#,skip = 6,col_types = list(col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character()))

# names(SAP_INV_DAT) = c("DROP","I_BILLDATE","I_ORG","I_DC","I_DOCCA","I_BILLTYPE","I_ORDER_REA","I_DIV","I_MATYPE","I_BILL_QTY","BLANK")
# SAP_INV_DAT<- SAP_INV_DAT[-which(is.na(SAP_INV_DAT$I_ORG)),] %>% .[-str_which(trimws(.$I_ORG),'SOrg.|-----'),c(-1,-11)]
# 
# SAP_INV_DAT$I_BILLDATE <- date(parse_datetime(SAP_INV_DAT$I_BILLDATE, "%m/%d/%Y"))
# 
# head(SAP_INV_DAT)

summary(EMP_DATA)
##    GeoFIPS            GeoName              Region       TableName        
##  Length:7084        Length:7084        Min.   :1.000   Length:7084       
##  Class :character   Class :character   1st Qu.:3.000   Class :character  
##  Mode  :character   Mode  :character   Median :5.000   Mode  :character  
##                                        Mean   :4.475                     
##                                        3rd Qu.:6.000                     
##                                        Max.   :8.000                     
##                                        NA's   :122                       
##     LineCode      IndustryClassification Description            Unit          
##  Min.   :  10.0   Length:7084            Length:7084        Length:7084       
##  1st Qu.: 517.0   Class :character       Class :character   Class :character  
##  Median : 712.5   Mode  :character       Mode  :character   Mode  :character  
##  Mean   : 872.1                                                               
##  3rd Qu.:1103.0                                                               
##  Max.   :2012.0                                                               
##  NA's   :4                                                                    
##      1998               1999               2000               2001          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2002               2003               2004               2005          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2006               2007               2008               2009          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2010               2011               2012               2013          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2014               2015               2016               2017          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2018          
##  Length:7084       
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
# MN-MAIN  | NN - RANGE  | NL - LAST |
EMAP_CAT_LAST <- EMP_DATA[,c(2,5,6,7)] %>% filter(GeoName=="United States") %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>%  separate(IndustryClassification, c("From", "To"))  %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISML=="L") %>%  select(.,LineCode,Description)

EMP_DAT_CAT <- EMP_DATA[,c(2,5,9:29)] %>% left_join(EMAP_CAT_LAST,.,by="LineCode")

# Reading the Category and deatail Desc for ref.
IND_DESC <- XML::xmlToDataFrame("SAEMP25N__definition.xml")
names(IND_DESC) <- c("LineCode","Description")

glimpse(EMP_DAT_CAT)
## Rows: 5,760
## Columns: 24
## $ LineCode    <dbl> 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, ...
## $ Description <chr> "Forestry and logging", "Forestry and logging", "Forest...
## $ GeoName     <chr> "United States", "Alabama", "Alaska", "Arizona", "Arkan...
## $ `1998`      <chr> "168100", "10193", "1397", "598", "7248", "7200", "698"...
## $ `1999`      <chr> "168700", "9637", "1455", "(T)", "7266", "7273", "727",...
## $ `2000`      <chr> "153300", "8852", "1307", "(T)", "6662", "6208", "693",...
## $ `2001`      <chr> "148400", "(D)", "986", "(D)", "6087", "6139", "637", "...
## $ `2002`      <chr> "144600", "8347", "858", "(D)", "(D)", "5917", "(D)", "...
## $ `2003`      <chr> "133800", "7929", "800", "(D)", "5716", "5340", "(D)", ...
## $ `2004`      <chr> "140100", "8222", "(D)", "363", "(D)", "5545", "(D)", "...
## $ `2005`      <chr> "147300", "8187", "790", "(D)", "5949", "5776", "(D)", ...
## $ `2006`      <chr> "142700", "8124", "623", "(D)", "(D)", "5610", "(D)", "...
## $ `2007`      <chr> "138100", "7800", "520", "(D)", "(D)", "5656", "(D)", "...
## $ `2008`      <chr> "136900", "7850", "460", "(D)", "5095", "5861", "(D)", ...
## $ `2009`      <chr> "119800", "7056", "440", "318", "4408", "4841", "(D)", ...
## $ `2010`      <chr> "111600", "6741", "(D)", "(D)", "4160", "4626", "579", ...
## $ `2011`      <chr> "116800", "6962", "(D)", "298", "4229", "4991", "558", ...
## $ `2012`      <chr> "134600", "7728", "505", "(D)", "(D)", "5603", "726", "...
## $ `2013`      <chr> "138000", "7970", "501", "388", "(D)", "5566", "755", "...
## $ `2014`      <chr> "133000", "7843", "525", "(D)", "(D)", "5256", "699", "...
## $ `2015`      <chr> "141300", "8059", "534", "(D)", "(D)", "5564", "(D)", "...
## $ `2016`      <chr> "138800", "7892", "503", "(D)", "4890", "5266", "795", ...
## $ `2017`      <chr> "123900", "7107", "(D)", "(D)", "4405", "4975", "691", ...
## $ `2018`      <chr> "117600", "6759", "401", "379", "4248", "4796", "641", ...
head(EMP_DAT_CAT,130)
glimpse(IND_DESC)
## Rows: 118
## Columns: 3
## $ LineCode    <fct> 10, 20, 40, 50, 60, 70, 80, 90, 100, 101, 102, 103, 200...
## $ Description <fct> "Total employment (number of jobs)", "Wage and salary e...
## $ NA          <fct> "A count of jobs, both full-time and part-time. It incl...
head(IND_DESC,5)
# # Other Range data 
# EMP_DATA[,c(2,5,6,7)] %>% filter(GeoName=="United States") %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>%  separate(IndustryClassification, c("From", "To"))  %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISM=="M") 
# 
# EMP_DATA %>% filter(GeoName=="United States") %>% .[,c(5,7)] %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>%  separate(IndustryClassification, c("From", "To"))  %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISM=="N" & ISML=="N") 

All Data for States

temp_name <- names(EMP_DAT_CAT [,c(4:24)])
EMP_DAT_CAT_SAN <-  data.frame(lapply(EMP_DAT_CAT [,c(4:24)],function(x) {
                 gsub("(D)|(T)",NA,x)
              })) 

names(EMP_DAT_CAT_SAN) = temp_name

EMP_DAT_CAT_SAN <- cbind(EMP_DAT_CAT[,c(1:3)],EMP_DAT_CAT_SAN)

EMP_DAT_CAT_SAN

Sample Data for New York

EMP_DAT_CAT_SAN %>% filter(GeoName=="New York")