ShinySup

EMP_DATA_FILE <- "EMP9818.csv"
EMP_DATA <- read_delim(EMP_DATA_FILE,delim = ",",col_names = TRUE,trim_ws= TRUE)

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   Region = col_double(),
##   LineCode = col_double()
## )

## See spec(...) for full column specifications.

## Warning: 4 parsing failures.
##  row col   expected    actual          file
## 7081  -- 29 columns 1 columns 'EMP9818.csv'
## 7082  -- 29 columns 1 columns 'EMP9818.csv'
## 7083  -- 29 columns 1 columns 'EMP9818.csv'
## 7084  -- 29 columns 1 columns 'EMP9818.csv'

# MN-MAIN  | NN - RANGE  | NL - LAST |
EMAP_CAT_LAST <- EMP_DATA[,c(2,5,6,7)] %>% filter(GeoName=="United States") %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>%  separate(IndustryClassification, c("From", "To"))  %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISML=="L") %>%  select(.,LineCode,Description)

## Warning: Expected 2 pieces. Additional pieces discarded in 2 rows [23, 35].

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 96 rows [10, 11,
## 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 30, 33, ...].

EMP_DAT_CAT <- EMP_DATA[,c(2,5,9:29)] %>% left_join(EMAP_CAT_LAST,.,by="LineCode")

# Reading the Category and deatail Desc for ref.
IND_DESC <- XML::xmlToDataFrame("SAEMP25N__definition.xml")
names(IND_DESC) <- c("LineCode","Description")

glimpse(EMP_DAT_CAT)

## Rows: 5,760
## Columns: 24
## $ LineCode    <dbl> 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, ...
## $ Description <chr> "Forestry and logging", "Forestry and logging", "Forest...
## $ GeoName     <chr> "United States", "Alabama", "Alaska", "Arizona", "Arkan...
## $ `1998`      <chr> "168100", "10193", "1397", "598", "7248", "7200", "698"...
## $ `1999`      <chr> "168700", "9637", "1455", "(T)", "7266", "7273", "727",...
## $ `2000`      <chr> "153300", "8852", "1307", "(T)", "6662", "6208", "693",...
## $ `2001`      <chr> "148400", "(D)", "986", "(D)", "6087", "6139", "637", "...
## $ `2002`      <chr> "144600", "8347", "858", "(D)", "(D)", "5917", "(D)", "...
## $ `2003`      <chr> "133800", "7929", "800", "(D)", "5716", "5340", "(D)", ...
## $ `2004`      <chr> "140100", "8222", "(D)", "363", "(D)", "5545", "(D)", "...
## $ `2005`      <chr> "147300", "8187", "790", "(D)", "5949", "5776", "(D)", ...
## $ `2006`      <chr> "142700", "8124", "623", "(D)", "(D)", "5610", "(D)", "...
## $ `2007`      <chr> "138100", "7800", "520", "(D)", "(D)", "5656", "(D)", "...
## $ `2008`      <chr> "136900", "7850", "460", "(D)", "5095", "5861", "(D)", ...
## $ `2009`      <chr> "119800", "7056", "440", "318", "4408", "4841", "(D)", ...
## $ `2010`      <chr> "111600", "6741", "(D)", "(D)", "4160", "4626", "579", ...
## $ `2011`      <chr> "116800", "6962", "(D)", "298", "4229", "4991", "558", ...
## $ `2012`      <chr> "134600", "7728", "505", "(D)", "(D)", "5603", "726", "...
## $ `2013`      <chr> "138000", "7970", "501", "388", "(D)", "5566", "755", "...
## $ `2014`      <chr> "133000", "7843", "525", "(D)", "(D)", "5256", "699", "...
## $ `2015`      <chr> "141300", "8059", "534", "(D)", "(D)", "5564", "(D)", "...
## $ `2016`      <chr> "138800", "7892", "503", "(D)", "4890", "5266", "795", ...
## $ `2017`      <chr> "123900", "7107", "(D)", "(D)", "4405", "4975", "691", ...
## $ `2018`      <chr> "117600", "6759", "401", "379", "4248", "4796", "641", ...

head(EMP_DAT_CAT,130)

## Warning: `...` is not empty.
## 
## We detected these problematic arguments:
## * `needs_dots`
## 
## These dots only exist to allow future extensions and should be empty.
## Did you misspecify an argument?

## # A tibble: 130 x 24
##    LineCode Description GeoName `1998` `1999` `2000` `2001` `2002` `2003` `2004`
##       <dbl> <chr>       <chr>   <chr>  <chr>  <chr>  <chr>  <chr>  <chr>  <chr> 
##  1      101 Forestry a~ United~ 168100 168700 153300 148400 144600 133800 140100
##  2      101 Forestry a~ Alabama 10193  9637   8852   (D)    8347   7929   8222  
##  3      101 Forestry a~ Alaska  1397   1455   1307   986    858    800    (D)   
##  4      101 Forestry a~ Arizona 598    (T)    (T)    (D)    (D)    (D)    363   
##  5      101 Forestry a~ Arkans~ 7248   7266   6662   6087   (D)    5716   (D)   
##  6      101 Forestry a~ Califo~ 7200   7273   6208   6139   5917   5340   5545  
##  7      101 Forestry a~ Colora~ 698    727    693    637    (D)    (D)    (D)   
##  8      101 Forestry a~ Connec~ (T)    531    452    433    431    384    419   
##  9      101 Forestry a~ Delawa~ (T)    (T)    (T)    56     (D)    29     (D)   
## 10      101 Forestry a~ Distri~ (T)    (T)    (T)    (D)    (D)    5      5     
## # ... with 120 more rows, and 14 more variables: `2005` <chr>, `2006` <chr>,
## #   `2007` <chr>, `2008` <chr>, `2009` <chr>, `2010` <chr>, `2011` <chr>,
## #   `2012` <chr>, `2013` <chr>, `2014` <chr>, `2015` <chr>, `2016` <chr>,
## #   `2017` <chr>, `2018` <chr>

glimpse(IND_DESC)

## Rows: 118
## Columns: 3
## $ LineCode    <fct> 10, 20, 40, 50, 60, 70, 80, 90, 100, 101, 102, 103, 200...
## $ Description <fct> "Total employment (number of jobs)", "Wage and salary e...
## $ NA          <fct> "A count of jobs, both full-time and part-time. It incl...

head(IND_DESC,5)

##   LineCode                       Description
## 1       10 Total employment (number of jobs)
## 2       20        Wage and salary employment
## 3       40            Proprietors employment
## 4       50       Farm proprietors employment
## 5       60    Nonfarm proprietors employment
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         NA
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    A count of jobs, both full-time and part-time. It includes wage and salary jobs, sole proprietorships, and individual general partners, but not unpaid family workers nor volunteers.
## 2 Wage and salary employment, also referred to as wage and salary jobs, measures the average annual number of full-time and part-time jobs in each area by place of work. All jobs for which wages and salaries are paid are counted. Although compensation paid to jurors, expert legal witnesses, prisoners, and justices of the peace (for marriage fees), is counted in wages and salaries, these activities are not counted as jobs in wage and salary employment. Corporate directorships are counted as self-employment. The following description of the sources and methods used in estimating wage and salary employment is divided into two sections: Employment in industries covered by unemployment insurance (UI) programs, and employment in industries not covered by UI.
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              Consists of farm proprietors employment and nonfarm proprietors employment.
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Consists of sole proprietors and non-corporate partners in the farm industry.
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Consists of the number of nonfarm sole proprietorships and the number of individual general partners in nonfarm partnerships.

temp_name <- names(EMP_DAT_CAT [,c(4:24)])
EMP_DAT_CAT_SAN <-  data.frame(lapply(EMP_DAT_CAT [,c(4:24)],function(x) {
                 gsub("(D)|(T)",NA,x)
              })) 

names(EMP_DAT_CAT_SAN) = temp_name

EMP_DAT_CAT_SAN <- cbind(EMP_DAT_CAT[,c(1:3)],EMP_DAT_CAT_SAN)
EMP_DAT_CAT_SAN[is.na(EMP_DAT_CAT_SAN)] <- 0

# EMP_DAT_CAT_SAN
saveRDS(EMP_DAT_CAT_SAN,"EMP_DAT_CAT_SAN")
EMP_DAT_CAT_SAN <- readRDS("EMP_DAT_CAT_SAN")

Long_EMP_DAT_CAT_SAN <- EMP_DAT_CAT_SAN %>%   gather(key="Year", value ="Value", -c(1,2,3))

## Warning: attributes are not identical across measure variables;
## they will be dropped

Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(GeoName %in% c("New York","California","Texas")) %>% ggplot(mapping = aes(x= Year, y=(as.numeric(Value)),fill = GeoName)) + 
geom_col(position = position_dodge()) + 
 
   theme(axis.text.x = element_text(angle = 60, colour="black",hjust = 1,size=rel(0.86)))+
  labs(title = "USSFoodBeverage Data for No. Of Jobs and CPI Value in NY",
       y = "Sector",x= "Years")

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning: Removed 36 rows containing missing values (geom_col).

Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(GeoName %in% c("New York","California","Texas"), Description %in% c("Health and personal care stores")) %>% ggplot(mapping = aes(x= Year, y=(as.numeric(Value)),fill = GeoName)) + 
geom_col(position = position_dodge()) +
  theme(axis.text.x = element_text(angle = 60, colour="black",hjust = 1,size=rel(0.86)))+
  labs(title = "USSFoodBeverage Data for No. Of Jobs and CPI Value in NY",
       y = "Sector",x= "Years")

Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(GeoName %in% c("New York","California","Texas"), Description %in% c("Health and personal care stores")) %>% ggplot(mapping = aes(x= Year, y=(as.numeric(Value)),group = GeoName,col=GeoName,fill=GeoName)) + 
geom_smooth(method="lm") + #geom_col(position = position_dodge()) +
  theme(axis.text.x = element_text(angle = 60, colour="black",hjust = 1,size=rel(0.86)))+
  labs(title = "Health and personal care stores Data for No. Of Jobs ...",
       y = "Sector",x= "Years")

## `geom_smooth()` using formula 'y ~ x'

Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(!GeoName %in% c("United States"), Description %in% c("Health and personal care stores")) %>% ggplot(mapping = aes(x= Year, y=(as.numeric(Value)),group = GeoName,col=GeoName,fill=GeoName)) + 
geom_smooth(method="lm") + #geom_col(position = position_dodge()) +
  theme(axis.text.x = element_text(angle = 60, colour="black",hjust = 1,size=rel(0.86)))+
  labs(title = "Health and personal care stores Data for No. Of Jobs ...",
       y = "Sector",x= "Years")

## `geom_smooth()` using formula 'y ~ x'

# Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(!GeoName %in% c("United States"), Description %in% c("Health and personal care stores")) %>% group_by(Year) %>% summarise(Avg= mean(as.numeric(Value))) %>% .$Avg %>%mean()

# Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% 
#                 filter(GeoName %in% c("New York"), Description %in%  c("Health and personal care stores"))
# %>%                 group_by(GeoName) %>% summarise(Avg= mean(as.numeric(Value))) %>% .$Avg



Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% 
                filter(GeoName %in% c("New York"), Description %in%  c("Health and personal care stores")) %>% select(Year,Value) %>% ggplot(mapping = aes(x-Year,y=Value))+ geom_abline()

# group_by(GeoName) %>% summarise(Avg= mean(as.numeric(Value))) %>% .$Avg

# Downloading the shapefiles for states at the lowest resolution

states <- states <- readRDS("state.rds")

Long_EMP_DAT_CAT_SAN[which(is.na(Long_EMP_DAT_CAT_SAN)),] = 0

Long_EMP_DAT_CAT_SAN1  <- Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(!GeoName %in% c("United States"), Year== "1998", Description %in% c("Health and personal care stores")) %>% left_join(.,data.frame(GeoName=states$NAME,state=states$STUSPS),by=c("GeoName" = "GeoName")) %>% .[!is.na(.$state),-c(4)]

## Warning: Column `GeoName` joining character vector and factor, coercing into
## character vector

Long_EMP_DAT_CAT_SAN1 <- geo_join(states, Long_EMP_DAT_CAT_SAN1, "STUSPS", "state")

## Warning: Column `STUSPS`/`state` joining character vector and factor, coercing
## into character vector

Long_EMP_DAT_CAT_SAN1<- Long_EMP_DAT_CAT_SAN1[-which(is.na(Long_EMP_DAT_CAT_SAN1$Value)),]
# data.frame(states$NAME,states$STUSPS)

# Creating a color palette based on the number range in the total column
pal <- colorNumeric("red", domain=(as.numeric(Long_EMP_DAT_CAT_SAN1$Value)))

pal <- colorBin("Oranges", domain = (as.numeric(Long_EMP_DAT_CAT_SAN1$Value)))

# Setting up the pop up text
popup_sb <- paste0("Total: ", as.character(Long_EMP_DAT_CAT_SAN1$NAME,"/n",Long_EMP_DAT_CAT_SAN1$Value))

library(rbin)
bins <- summary(as.numeric(Long_EMP_DAT_CAT_SAN1$Value))
pal <- colorBin("YlOrRd", domain = (as.numeric(Long_EMP_DAT_CAT_SAN1$Value)), bins = 10, pretty = TRUE,na.color = "#808080", alpha = FALSE, reverse = FALSE,
         right = FALSE)
# pal <- colorBin("YlOrRd", domain = (as.numeric(Long_EMP_DAT_CAT_SAN1$Value)),bins=bins)

Long_EMP_DAT_CAT_SAN1$Value <- as.numeric(Long_EMP_DAT_CAT_SAN1$Value)

Long_EMP_DAT_CAT_SAN1  %>% leaflet() %>%  
   addTiles() %>%
setView(-98.483330, 38.712046, zoom = 4) %>% 
   addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
     opacity = 1.0, fillOpacity = 0.5,
     fillColor = ~pal(Value),
     popup = ~popup_sb,
     highlightOptions = highlightOptions(color = "white", weight = 2,
       bringToFront = TRUE)) %>%  
  addLegend(pal = pal, 
            values = as.numeric(Long_EMP_DAT_CAT_SAN1$Value), 
            position = "bottomright", 
            title = "Starbucks")

## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs ).
## Need '+proj=longlat +datum=WGS84'

## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette YlOrRd is 9
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette YlOrRd is 9
## Returning the palette you asked for with that many colors

# write_rds(Long_EMP_DAT_CAT_SAN,"Long_EMP_DAT_CAT_SAN")
# EMP_DAT_CAT_SAN
industry = "Health and personal care stores"
Long_EMP_DAT_CAT_SAN2 <- Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>%
  filter(!GeoName %in% c("United States") ,Description %in% c(industry)) %>%
  left_join(.,data.frame(GeoName=states$NAME,state=states$STUSPS),by=c("GeoName" = "GeoName")) %>%
  .[!is.na(.$state),-c(4)]

## Warning: Column `GeoName` joining character vector and factor, coercing into
## character vector

Long_EMP_DAT_CAT_SAN2$Value <- as.numeric(Long_EMP_DAT_CAT_SAN2$Value)


# set industry avg data for map layer
m_data_indus <- Long_EMP_DAT_CAT_SAN2 %>% group_by(state) %>%  summarise(Value = mean(Value))  
m_data_indus <- geo_join( states,m_data_indus,"STUSPS", "state")

## Warning: Column `STUSPS`/`state` joining character vector and factor, coercing
## into character vector

m_data_indus <- m_data_indus[-which(is.na(m_data_indus$Value)),]

# Long_EMP_DAT_CAT_SAN1$Year %>% unique()
# # unique(Long_EMP_DAT_CAT_SAN1$Description)
# EMP_DAT_CAT_SAN[is.na(EMP_DAT_CAT_SAN)] <- 0
# Long_EMP_DAT_CAT_SAN2[is.na(Long_EMP_DAT_CAT_SAN2),]
# Long_EMP_DAT_CAT_SAN2 <- Long_EMP_DAT_CAT_SAN2[-which(is.na(Long_EMP_DAT_CAT_SAN2$Value)),]
# Long_EMP_DAT_CAT_SAN2$Value <- as.numeric(Long_EMP_DAT_CAT_SAN2$Value)
# Long_EMP_DAT_CAT_SAN2$Description

glimpse(m_data_indus)

## Rows: 51
## Columns: 12
## $ STATEFP  <chr> "12", "30", "27", "24", "45", "23", "15", "11", "44", "31"...
## $ STATENS  <chr> "00294478", "00767982", "00662849", "01714934", "01779799"...
## $ AFFGEOID <chr> "0400000US12", "0400000US30", "0400000US27", "0400000US24"...
## $ GEOID    <chr> "12", "30", "27", "24", "45", "23", "15", "11", "44", "31"...
## $ STUSPS   <chr> "FL", "MT", "MN", "MD", "SC", "ME", "HI", "DC", "RI", "NE"...
## $ NAME     <chr> "Florida", "Montana", "Minnesota", "Maryland", "South Caro...
## $ LSAD     <chr> "00", "00", "00", "00", "00", "00", "00", "00", "00", "00"...
## $ ALAND    <dbl> 138947364717, 376966832749, 206230065476, 25151726296, 778...
## $ AWATER   <dbl> 31362872853, 3869031338, 18942261495, 6979340970, 50758745...
## $ Value    <dbl> 80545.190, 2685.143, 18439.238, 22093.143, 17153.286, 4214...
## $ rank     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-80.17628 2..., MULTIPOLYGON ...

restofUS <- Long_EMP_DAT_CAT_SAN2 %>% filter(state != "NY") %>% group_by(Year) %>% summarise(Value=mean(Value)) 
restofUS$state = "REST"
Long_EMP_DAT_CAT_SAN2 %>% filter(state == "NY") %>% ggplot(mapping = aes(x=Year,y=Value, group=state)) +
 geom_line()+ geom_point() + geom_point(data=restofUS,color="red") +
   theme(axis.text.x = element_text(angle = 60, colour="gray",hjust = 1,size=rel(0.86)))+
  labs(title = "USSFoodBeverage Data for No. Of Jobs and CPI Value in NY",
       y = "Sector",x= "Years")

restofUS <- Long_EMP_DAT_CAT_SAN2 %>% filter(state != "NY") %>% group_by(Year) %>% summarise(Value=mean(Value)) 
restofUS$state = "REST"
Long_EMP_DAT_CAT_SAN2 %>% filter(state == "NY") %>% ggplot(mapping = aes(x=Year,y=Value, group=state)) +
 geom_line()+ geom_point() + geom_point(data=restofUS,color="red") +
   theme(axis.text.x = element_text(angle = 60, colour="gray",hjust = 1,size=rel(0.86)))+
  labs(title = "USSFoodBeverage Data for No. Of Jobs and CPI Value in NY",
       y = "Sector",x= "Years")

# Setting up the pop up text
popup_sb <- paste0("Total: ", as.character(paste(m_data_indus$NAME,
                                                 format(m_data_indus$Value,nsmall = 3,digits = 3),sep="\n")))

library(rbin)
bins <- summary(m_data_indus$Value)
pal <- colorBin("YlOrRd", domain = m_data_indus$Value, bins = bins, pretty = TRUE,na.color = "#808080", alpha = FALSE, reverse = FALSE,
         right = FALSE)
# pal <- colorBin("YlOrRd", domain = (as.numeric(Long_EMP_DAT_CAT_SAN1$Value)),bins=bins)

# Long_EMP_DAT_CAT_SAN1$Value <- as.numeric(Long_EMP_DAT_CAT_SAN1$Value)

m_data_indus  %>% leaflet() %>%  
   addTiles() %>%
setView(-98.483330, 38.712046, zoom = 4) %>% 
   addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
     opacity = 1.0, fillOpacity = 0.5,
     fillColor = ~pal(Value),
     popup = ~popup_sb,
     highlightOptions = highlightOptions(color = "white", weight = 2,
       bringToFront = TRUE)) %>%  
  addLegend(pal = pal, 
            values = m_data_indus$Value, 
            position = "bottomright", 
            title = industry)

## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs ).
## Need '+proj=longlat +datum=WGS84'

ShinySup

Rajwant Mishra

November 12, 2020