Proposal

Objective

In this project I am planning to create a public visualization using data relevant to a Employment data over year by sectors/idustry and state, my objective is to give an ability to comapre diffrenct sectors over the year and visualise it.

Proposed visualization

Ability to compare Sectors and growth per sector over different state by Year

I am planning build a shiny app which gives ability to select sector or more and compare them against one or more state.

I would see how I can use map to summaries the information across different state for the sectors in some limited scales or quantiles( i.e. which state falls in which quantiles of the growth)

Ability to check information of growth over the year for the give state and across state. I need to see how best I can display the yearly information in one view with exact information.

Analysis to find insights in the data

Which sector constantly grew over time, vs which sector didn’t grew.

About Employment Data

Data source

1/ The estimates of employment for 1998-2006 are based on the 2002 North American Industry Classification System (NAICS). The estimates for 2007-2010 are based on the 2007 NAICS. The estimates for 2011-2016 are based on the 2012 NAICS. The estimates for 2017 forward are based on the 2017 NAICS.
2/ Excludes limited partners.
3/ Under the 2007 NAICS, internet publishing and broadcasting was reclassified to other information services.
(D) Not shown to avoid disclosure of confidential information; estimates are included in higher-level totals.
(T) Estimate for employment suppressed to cover corresponding estimate for earnings. Estimates for this item are included in the total.
(NA) Not available.
(NM) Not meaningful.
Last updated: September 24, 2019– new statistics for 2018; revised statistics for 2014-2017.

Data Work

EMP_DATA_FILE <- "EMP9818.csv"
EMP_DATA <- read_delim(EMP_DATA_FILE,delim = ",",col_names = TRUE,trim_ws= TRUE)#,skip = 6,col_types = list(col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character()))

# names(SAP_INV_DAT) = c("DROP","I_BILLDATE","I_ORG","I_DC","I_DOCCA","I_BILLTYPE","I_ORDER_REA","I_DIV","I_MATYPE","I_BILL_QTY","BLANK")
# SAP_INV_DAT<- SAP_INV_DAT[-which(is.na(SAP_INV_DAT$I_ORG)),] %>% .[-str_which(trimws(.$I_ORG),'SOrg.|-----'),c(-1,-11)]
# 
# SAP_INV_DAT$I_BILLDATE <- date(parse_datetime(SAP_INV_DAT$I_BILLDATE, "%m/%d/%Y"))
# 
# head(SAP_INV_DAT)

summary(EMP_DATA)

##    GeoFIPS            GeoName              Region       TableName        
##  Length:7084        Length:7084        Min.   :1.000   Length:7084       
##  Class :character   Class :character   1st Qu.:3.000   Class :character  
##  Mode  :character   Mode  :character   Median :5.000   Mode  :character  
##                                        Mean   :4.475                     
##                                        3rd Qu.:6.000                     
##                                        Max.   :8.000                     
##                                        NA's   :122                       
##     LineCode      IndustryClassification Description            Unit          
##  Min.   :  10.0   Length:7084            Length:7084        Length:7084       
##  1st Qu.: 517.0   Class :character       Class :character   Class :character  
##  Median : 712.5   Mode  :character       Mode  :character   Mode  :character  
##  Mean   : 872.1                                                               
##  3rd Qu.:1103.0                                                               
##  Max.   :2012.0                                                               
##  NA's   :4                                                                    
##      1998               1999               2000               2001          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2002               2003               2004               2005          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2006               2007               2008               2009          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2010               2011               2012               2013          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2014               2015               2016               2017          
##  Length:7084        Length:7084        Length:7084        Length:7084       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      2018          
##  Length:7084       
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
##

# MN-MAIN  | NN - RANGE  | NL - LAST |
EMAP_CAT_LAST <- EMP_DATA[,c(2,5,6,7)] %>% filter(GeoName=="United States") %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>%  separate(IndustryClassification, c("From", "To"))  %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISML=="L") %>%  select(.,LineCode,Description)

EMP_DAT_CAT <- EMP_DATA[,c(2,5,9:29)] %>% left_join(EMAP_CAT_LAST,.,by="LineCode")

# Reading the Category and deatail Desc for ref.
IND_DESC <- XML::xmlToDataFrame("SAEMP25N__definition.xml")
names(IND_DESC) <- c("LineCode","Description")

glimpse(EMP_DAT_CAT)

## Rows: 5,760
## Columns: 24
## $ LineCode    <dbl> 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, ...
## $ Description <chr> "Forestry and logging", "Forestry and logging", "Forest...
## $ GeoName     <chr> "United States", "Alabama", "Alaska", "Arizona", "Arkan...
## $ `1998`      <chr> "168100", "10193", "1397", "598", "7248", "7200", "698"...
## $ `1999`      <chr> "168700", "9637", "1455", "(T)", "7266", "7273", "727",...
## $ `2000`      <chr> "153300", "8852", "1307", "(T)", "6662", "6208", "693",...
## $ `2001`      <chr> "148400", "(D)", "986", "(D)", "6087", "6139", "637", "...
## $ `2002`      <chr> "144600", "8347", "858", "(D)", "(D)", "5917", "(D)", "...
## $ `2003`      <chr> "133800", "7929", "800", "(D)", "5716", "5340", "(D)", ...
## $ `2004`      <chr> "140100", "8222", "(D)", "363", "(D)", "5545", "(D)", "...
## $ `2005`      <chr> "147300", "8187", "790", "(D)", "5949", "5776", "(D)", ...
## $ `2006`      <chr> "142700", "8124", "623", "(D)", "(D)", "5610", "(D)", "...
## $ `2007`      <chr> "138100", "7800", "520", "(D)", "(D)", "5656", "(D)", "...
## $ `2008`      <chr> "136900", "7850", "460", "(D)", "5095", "5861", "(D)", ...
## $ `2009`      <chr> "119800", "7056", "440", "318", "4408", "4841", "(D)", ...
## $ `2010`      <chr> "111600", "6741", "(D)", "(D)", "4160", "4626", "579", ...
## $ `2011`      <chr> "116800", "6962", "(D)", "298", "4229", "4991", "558", ...
## $ `2012`      <chr> "134600", "7728", "505", "(D)", "(D)", "5603", "726", "...
## $ `2013`      <chr> "138000", "7970", "501", "388", "(D)", "5566", "755", "...
## $ `2014`      <chr> "133000", "7843", "525", "(D)", "(D)", "5256", "699", "...
## $ `2015`      <chr> "141300", "8059", "534", "(D)", "(D)", "5564", "(D)", "...
## $ `2016`      <chr> "138800", "7892", "503", "(D)", "4890", "5266", "795", ...
## $ `2017`      <chr> "123900", "7107", "(D)", "(D)", "4405", "4975", "691", ...
## $ `2018`      <chr> "117600", "6759", "401", "379", "4248", "4796", "641", ...

head(EMP_DAT_CAT,130)

glimpse(IND_DESC)

## Rows: 118
## Columns: 3
## $ LineCode    <fct> 10, 20, 40, 50, 60, 70, 80, 90, 100, 101, 102, 103, 200...
## $ Description <fct> "Total employment (number of jobs)", "Wage and salary e...
## $ NA          <fct> "A count of jobs, both full-time and part-time. It incl...

head(IND_DESC,5)

# # Other Range data 
# EMP_DATA[,c(2,5,6,7)] %>% filter(GeoName=="United States") %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>%  separate(IndustryClassification, c("From", "To"))  %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISM=="M") 
# 
# EMP_DATA %>% filter(GeoName=="United States") %>% .[,c(5,7)] %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>%  separate(IndustryClassification, c("From", "To"))  %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISM=="N" & ISML=="N")

All Data for States

temp_name <- names(EMP_DAT_CAT [,c(4:24)])
EMP_DAT_CAT_SAN <-  data.frame(lapply(EMP_DAT_CAT [,c(4:24)],function(x) {
                 gsub("(D)|(T)",NA,x)
              })) 

names(EMP_DAT_CAT_SAN) = temp_name

EMP_DAT_CAT_SAN <- cbind(EMP_DAT_CAT[,c(1:3)],EMP_DAT_CAT_SAN)

EMP_DAT_CAT_SAN

Sample Data for New York

EMP_DAT_CAT_SAN %>% filter(GeoName=="New York")