In this project I am planning to create a public visualization using data relevant to a Employment data over year by sectors/idustry and state, my objective is to give an ability to comapre diffrenct sectors over the year and visualise it.
I am planning build a shiny app which gives ability to select sector or more and compare them against one or more state.
I would see how I can use map to summaries the information across different state for the sectors in some limited scales or quantiles( i.e. which state falls in which quantiles of the growth)
EMP_DATA_FILE <- "EMP9818.csv"
EMP_DATA <- read_delim(EMP_DATA_FILE,delim = ",",col_names = TRUE,trim_ws= TRUE)#,skip = 6,col_types = list(col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character(),col_character()))
# names(SAP_INV_DAT) = c("DROP","I_BILLDATE","I_ORG","I_DC","I_DOCCA","I_BILLTYPE","I_ORDER_REA","I_DIV","I_MATYPE","I_BILL_QTY","BLANK")
# SAP_INV_DAT<- SAP_INV_DAT[-which(is.na(SAP_INV_DAT$I_ORG)),] %>% .[-str_which(trimws(.$I_ORG),'SOrg.|-----'),c(-1,-11)]
#
# SAP_INV_DAT$I_BILLDATE <- date(parse_datetime(SAP_INV_DAT$I_BILLDATE, "%m/%d/%Y"))
#
# head(SAP_INV_DAT)
summary(EMP_DATA)## GeoFIPS GeoName Region TableName
## Length:7084 Length:7084 Min. :1.000 Length:7084
## Class :character Class :character 1st Qu.:3.000 Class :character
## Mode :character Mode :character Median :5.000 Mode :character
## Mean :4.475
## 3rd Qu.:6.000
## Max. :8.000
## NA's :122
## LineCode IndustryClassification Description Unit
## Min. : 10.0 Length:7084 Length:7084 Length:7084
## 1st Qu.: 517.0 Class :character Class :character Class :character
## Median : 712.5 Mode :character Mode :character Mode :character
## Mean : 872.1
## 3rd Qu.:1103.0
## Max. :2012.0
## NA's :4
## 1998 1999 2000 2001
## Length:7084 Length:7084 Length:7084 Length:7084
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## 2002 2003 2004 2005
## Length:7084 Length:7084 Length:7084 Length:7084
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## 2006 2007 2008 2009
## Length:7084 Length:7084 Length:7084 Length:7084
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## 2010 2011 2012 2013
## Length:7084 Length:7084 Length:7084 Length:7084
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## 2014 2015 2016 2017
## Length:7084 Length:7084 Length:7084 Length:7084
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## 2018
## Length:7084
## Class :character
## Mode :character
##
##
##
##
# MN-MAIN | NN - RANGE | NL - LAST |
EMAP_CAT_LAST <- EMP_DATA[,c(2,5,6,7)] %>% filter(GeoName=="United States") %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>% separate(IndustryClassification, c("From", "To")) %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISML=="L") %>% select(.,LineCode,Description)
EMP_DAT_CAT <- EMP_DATA[,c(2,5,9:29)] %>% left_join(EMAP_CAT_LAST,.,by="LineCode")
# Reading the Category and deatail Desc for ref.
IND_DESC <- XML::xmlToDataFrame("SAEMP25N__definition.xml")
names(IND_DESC) <- c("LineCode","Description")
glimpse(EMP_DAT_CAT)## Rows: 5,760
## Columns: 24
## $ LineCode <dbl> 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, ...
## $ Description <chr> "Forestry and logging", "Forestry and logging", "Forest...
## $ GeoName <chr> "United States", "Alabama", "Alaska", "Arizona", "Arkan...
## $ `1998` <chr> "168100", "10193", "1397", "598", "7248", "7200", "698"...
## $ `1999` <chr> "168700", "9637", "1455", "(T)", "7266", "7273", "727",...
## $ `2000` <chr> "153300", "8852", "1307", "(T)", "6662", "6208", "693",...
## $ `2001` <chr> "148400", "(D)", "986", "(D)", "6087", "6139", "637", "...
## $ `2002` <chr> "144600", "8347", "858", "(D)", "(D)", "5917", "(D)", "...
## $ `2003` <chr> "133800", "7929", "800", "(D)", "5716", "5340", "(D)", ...
## $ `2004` <chr> "140100", "8222", "(D)", "363", "(D)", "5545", "(D)", "...
## $ `2005` <chr> "147300", "8187", "790", "(D)", "5949", "5776", "(D)", ...
## $ `2006` <chr> "142700", "8124", "623", "(D)", "(D)", "5610", "(D)", "...
## $ `2007` <chr> "138100", "7800", "520", "(D)", "(D)", "5656", "(D)", "...
## $ `2008` <chr> "136900", "7850", "460", "(D)", "5095", "5861", "(D)", ...
## $ `2009` <chr> "119800", "7056", "440", "318", "4408", "4841", "(D)", ...
## $ `2010` <chr> "111600", "6741", "(D)", "(D)", "4160", "4626", "579", ...
## $ `2011` <chr> "116800", "6962", "(D)", "298", "4229", "4991", "558", ...
## $ `2012` <chr> "134600", "7728", "505", "(D)", "(D)", "5603", "726", "...
## $ `2013` <chr> "138000", "7970", "501", "388", "(D)", "5566", "755", "...
## $ `2014` <chr> "133000", "7843", "525", "(D)", "(D)", "5256", "699", "...
## $ `2015` <chr> "141300", "8059", "534", "(D)", "(D)", "5564", "(D)", "...
## $ `2016` <chr> "138800", "7892", "503", "(D)", "4890", "5266", "795", ...
## $ `2017` <chr> "123900", "7107", "(D)", "(D)", "4405", "4975", "691", ...
## $ `2018` <chr> "117600", "6759", "401", "379", "4248", "4796", "641", ...
## Rows: 118
## Columns: 3
## $ LineCode <fct> 10, 20, 40, 50, 60, 70, 80, 90, 100, 101, 102, 103, 200...
## $ Description <fct> "Total employment (number of jobs)", "Wage and salary e...
## $ NA <fct> "A count of jobs, both full-time and part-time. It incl...
# # Other Range data
# EMP_DATA[,c(2,5,6,7)] %>% filter(GeoName=="United States") %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>% separate(IndustryClassification, c("From", "To")) %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISM=="M")
#
# EMP_DATA %>% filter(GeoName=="United States") %>% .[,c(5,7)] %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>% separate(IndustryClassification, c("From", "To")) %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISM=="N" & ISML=="N")