EMP_DATA_FILE <- "EMP9818.csv"
EMP_DATA <- read_delim(EMP_DATA_FILE,delim = ",",col_names = TRUE,trim_ws= TRUE)
## Parsed with column specification:
## cols(
## .default = col_character(),
## Region = col_double(),
## LineCode = col_double()
## )
## See spec(...) for full column specifications.
## Warning: 4 parsing failures.
## row col expected actual file
## 7081 -- 29 columns 1 columns 'EMP9818.csv'
## 7082 -- 29 columns 1 columns 'EMP9818.csv'
## 7083 -- 29 columns 1 columns 'EMP9818.csv'
## 7084 -- 29 columns 1 columns 'EMP9818.csv'
# MN-MAIN | NN - RANGE | NL - LAST |
EMAP_CAT_LAST <- EMP_DATA[,c(2,5,6,7)] %>% filter(GeoName=="United States") %>% mutate(ISM = ifelse(IndustryClassification=="...","M","N")) %>% separate(IndustryClassification, c("From", "To")) %>% mutate(ISML = ifelse(is.na(To)==T,"L","N")) %>% filter(ISML=="L") %>% select(.,LineCode,Description)
## Warning: Expected 2 pieces. Additional pieces discarded in 2 rows [23, 35].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 96 rows [10, 11,
## 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 30, 33, ...].
EMP_DAT_CAT <- EMP_DATA[,c(2,5,9:29)] %>% left_join(EMAP_CAT_LAST,.,by="LineCode")
# Reading the Category and deatail Desc for ref.
IND_DESC <- XML::xmlToDataFrame("SAEMP25N__definition.xml")
names(IND_DESC) <- c("LineCode","Description")
glimpse(EMP_DAT_CAT)
## Rows: 5,760
## Columns: 24
## $ LineCode <dbl> 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, ...
## $ Description <chr> "Forestry and logging", "Forestry and logging", "Forest...
## $ GeoName <chr> "United States", "Alabama", "Alaska", "Arizona", "Arkan...
## $ `1998` <chr> "168100", "10193", "1397", "598", "7248", "7200", "698"...
## $ `1999` <chr> "168700", "9637", "1455", "(T)", "7266", "7273", "727",...
## $ `2000` <chr> "153300", "8852", "1307", "(T)", "6662", "6208", "693",...
## $ `2001` <chr> "148400", "(D)", "986", "(D)", "6087", "6139", "637", "...
## $ `2002` <chr> "144600", "8347", "858", "(D)", "(D)", "5917", "(D)", "...
## $ `2003` <chr> "133800", "7929", "800", "(D)", "5716", "5340", "(D)", ...
## $ `2004` <chr> "140100", "8222", "(D)", "363", "(D)", "5545", "(D)", "...
## $ `2005` <chr> "147300", "8187", "790", "(D)", "5949", "5776", "(D)", ...
## $ `2006` <chr> "142700", "8124", "623", "(D)", "(D)", "5610", "(D)", "...
## $ `2007` <chr> "138100", "7800", "520", "(D)", "(D)", "5656", "(D)", "...
## $ `2008` <chr> "136900", "7850", "460", "(D)", "5095", "5861", "(D)", ...
## $ `2009` <chr> "119800", "7056", "440", "318", "4408", "4841", "(D)", ...
## $ `2010` <chr> "111600", "6741", "(D)", "(D)", "4160", "4626", "579", ...
## $ `2011` <chr> "116800", "6962", "(D)", "298", "4229", "4991", "558", ...
## $ `2012` <chr> "134600", "7728", "505", "(D)", "(D)", "5603", "726", "...
## $ `2013` <chr> "138000", "7970", "501", "388", "(D)", "5566", "755", "...
## $ `2014` <chr> "133000", "7843", "525", "(D)", "(D)", "5256", "699", "...
## $ `2015` <chr> "141300", "8059", "534", "(D)", "(D)", "5564", "(D)", "...
## $ `2016` <chr> "138800", "7892", "503", "(D)", "4890", "5266", "795", ...
## $ `2017` <chr> "123900", "7107", "(D)", "(D)", "4405", "4975", "691", ...
## $ `2018` <chr> "117600", "6759", "401", "379", "4248", "4796", "641", ...
head(EMP_DAT_CAT,130)
## Warning: `...` is not empty.
##
## We detected these problematic arguments:
## * `needs_dots`
##
## These dots only exist to allow future extensions and should be empty.
## Did you misspecify an argument?
## # A tibble: 130 x 24
## LineCode Description GeoName `1998` `1999` `2000` `2001` `2002` `2003` `2004`
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 101 Forestry a~ United~ 168100 168700 153300 148400 144600 133800 140100
## 2 101 Forestry a~ Alabama 10193 9637 8852 (D) 8347 7929 8222
## 3 101 Forestry a~ Alaska 1397 1455 1307 986 858 800 (D)
## 4 101 Forestry a~ Arizona 598 (T) (T) (D) (D) (D) 363
## 5 101 Forestry a~ Arkans~ 7248 7266 6662 6087 (D) 5716 (D)
## 6 101 Forestry a~ Califo~ 7200 7273 6208 6139 5917 5340 5545
## 7 101 Forestry a~ Colora~ 698 727 693 637 (D) (D) (D)
## 8 101 Forestry a~ Connec~ (T) 531 452 433 431 384 419
## 9 101 Forestry a~ Delawa~ (T) (T) (T) 56 (D) 29 (D)
## 10 101 Forestry a~ Distri~ (T) (T) (T) (D) (D) 5 5
## # ... with 120 more rows, and 14 more variables: `2005` <chr>, `2006` <chr>,
## # `2007` <chr>, `2008` <chr>, `2009` <chr>, `2010` <chr>, `2011` <chr>,
## # `2012` <chr>, `2013` <chr>, `2014` <chr>, `2015` <chr>, `2016` <chr>,
## # `2017` <chr>, `2018` <chr>
glimpse(IND_DESC)
## Rows: 118
## Columns: 3
## $ LineCode <fct> 10, 20, 40, 50, 60, 70, 80, 90, 100, 101, 102, 103, 200...
## $ Description <fct> "Total employment (number of jobs)", "Wage and salary e...
## $ NA <fct> "A count of jobs, both full-time and part-time. It incl...
head(IND_DESC,5)
## LineCode Description
## 1 10 Total employment (number of jobs)
## 2 20 Wage and salary employment
## 3 40 Proprietors employment
## 4 50 Farm proprietors employment
## 5 60 Nonfarm proprietors employment
## NA
## 1 A count of jobs, both full-time and part-time. It includes wage and salary jobs, sole proprietorships, and individual general partners, but not unpaid family workers nor volunteers.
## 2 Wage and salary employment, also referred to as wage and salary jobs, measures the average annual number of full-time and part-time jobs in each area by place of work. All jobs for which wages and salaries are paid are counted. Although compensation paid to jurors, expert legal witnesses, prisoners, and justices of the peace (for marriage fees), is counted in wages and salaries, these activities are not counted as jobs in wage and salary employment. Corporate directorships are counted as self-employment. The following description of the sources and methods used in estimating wage and salary employment is divided into two sections: Employment in industries covered by unemployment insurance (UI) programs, and employment in industries not covered by UI.
## 3 Consists of farm proprietors employment and nonfarm proprietors employment.
## 4 Consists of sole proprietors and non-corporate partners in the farm industry.
## 5 Consists of the number of nonfarm sole proprietorships and the number of individual general partners in nonfarm partnerships.
temp_name <- names(EMP_DAT_CAT [,c(4:24)])
EMP_DAT_CAT_SAN <- data.frame(lapply(EMP_DAT_CAT [,c(4:24)],function(x) {
gsub("(D)|(T)",NA,x)
}))
names(EMP_DAT_CAT_SAN) = temp_name
EMP_DAT_CAT_SAN <- cbind(EMP_DAT_CAT[,c(1:3)],EMP_DAT_CAT_SAN)
EMP_DAT_CAT_SAN[is.na(EMP_DAT_CAT_SAN)] <- 0
# EMP_DAT_CAT_SAN
saveRDS(EMP_DAT_CAT_SAN,"EMP_DAT_CAT_SAN")
EMP_DAT_CAT_SAN <- readRDS("EMP_DAT_CAT_SAN")
Long_EMP_DAT_CAT_SAN <- EMP_DAT_CAT_SAN %>% gather(key="Year", value ="Value", -c(1,2,3))
## Warning: attributes are not identical across measure variables;
## they will be dropped
Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(GeoName %in% c("New York","California","Texas")) %>% ggplot(mapping = aes(x= Year, y=(as.numeric(Value)),fill = GeoName)) +
geom_col(position = position_dodge()) +
theme(axis.text.x = element_text(angle = 60, colour="black",hjust = 1,size=rel(0.86)))+
labs(title = "USSFoodBeverage Data for No. Of Jobs and CPI Value in NY",
y = "Sector",x= "Years")
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning: Removed 36 rows containing missing values (geom_col).

Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(GeoName %in% c("New York","California","Texas"), Description %in% c("Health and personal care stores")) %>% ggplot(mapping = aes(x= Year, y=(as.numeric(Value)),fill = GeoName)) +
geom_col(position = position_dodge()) +
theme(axis.text.x = element_text(angle = 60, colour="black",hjust = 1,size=rel(0.86)))+
labs(title = "USSFoodBeverage Data for No. Of Jobs and CPI Value in NY",
y = "Sector",x= "Years")

Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(GeoName %in% c("New York","California","Texas"), Description %in% c("Health and personal care stores")) %>% ggplot(mapping = aes(x= Year, y=(as.numeric(Value)),group = GeoName,col=GeoName,fill=GeoName)) +
geom_smooth(method="lm") + #geom_col(position = position_dodge()) +
theme(axis.text.x = element_text(angle = 60, colour="black",hjust = 1,size=rel(0.86)))+
labs(title = "Health and personal care stores Data for No. Of Jobs ...",
y = "Sector",x= "Years")
## `geom_smooth()` using formula 'y ~ x'

Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(!GeoName %in% c("United States"), Description %in% c("Health and personal care stores")) %>% ggplot(mapping = aes(x= Year, y=(as.numeric(Value)),group = GeoName,col=GeoName,fill=GeoName)) +
geom_smooth(method="lm") + #geom_col(position = position_dodge()) +
theme(axis.text.x = element_text(angle = 60, colour="black",hjust = 1,size=rel(0.86)))+
labs(title = "Health and personal care stores Data for No. Of Jobs ...",
y = "Sector",x= "Years")
## `geom_smooth()` using formula 'y ~ x'

# Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(!GeoName %in% c("United States"), Description %in% c("Health and personal care stores")) %>% group_by(Year) %>% summarise(Avg= mean(as.numeric(Value))) %>% .$Avg %>%mean()
# Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>%
# filter(GeoName %in% c("New York"), Description %in% c("Health and personal care stores"))
# %>% group_by(GeoName) %>% summarise(Avg= mean(as.numeric(Value))) %>% .$Avg
Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>%
filter(GeoName %in% c("New York"), Description %in% c("Health and personal care stores")) %>% select(Year,Value) %>% ggplot(mapping = aes(x-Year,y=Value))+ geom_abline()

# group_by(GeoName) %>% summarise(Avg= mean(as.numeric(Value))) %>% .$Avg
# Downloading the shapefiles for states at the lowest resolution
states <- states <- readRDS("state.rds")
Long_EMP_DAT_CAT_SAN[which(is.na(Long_EMP_DAT_CAT_SAN)),] = 0
Long_EMP_DAT_CAT_SAN1 <- Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>% filter(!GeoName %in% c("United States"), Year== "1998", Description %in% c("Health and personal care stores")) %>% left_join(.,data.frame(GeoName=states$NAME,state=states$STUSPS),by=c("GeoName" = "GeoName")) %>% .[!is.na(.$state),-c(4)]
## Warning: Column `GeoName` joining character vector and factor, coercing into
## character vector
Long_EMP_DAT_CAT_SAN1 <- geo_join(states, Long_EMP_DAT_CAT_SAN1, "STUSPS", "state")
## Warning: Column `STUSPS`/`state` joining character vector and factor, coercing
## into character vector
Long_EMP_DAT_CAT_SAN1<- Long_EMP_DAT_CAT_SAN1[-which(is.na(Long_EMP_DAT_CAT_SAN1$Value)),]
# data.frame(states$NAME,states$STUSPS)
# Creating a color palette based on the number range in the total column
pal <- colorNumeric("red", domain=(as.numeric(Long_EMP_DAT_CAT_SAN1$Value)))
pal <- colorBin("Oranges", domain = (as.numeric(Long_EMP_DAT_CAT_SAN1$Value)))
# Setting up the pop up text
popup_sb <- paste0("Total: ", as.character(Long_EMP_DAT_CAT_SAN1$NAME,"/n",Long_EMP_DAT_CAT_SAN1$Value))
library(rbin)
bins <- summary(as.numeric(Long_EMP_DAT_CAT_SAN1$Value))
pal <- colorBin("YlOrRd", domain = (as.numeric(Long_EMP_DAT_CAT_SAN1$Value)), bins = 10, pretty = TRUE,na.color = "#808080", alpha = FALSE, reverse = FALSE,
right = FALSE)
# pal <- colorBin("YlOrRd", domain = (as.numeric(Long_EMP_DAT_CAT_SAN1$Value)),bins=bins)
Long_EMP_DAT_CAT_SAN1$Value <- as.numeric(Long_EMP_DAT_CAT_SAN1$Value)
Long_EMP_DAT_CAT_SAN1 %>% leaflet() %>%
addTiles() %>%
setView(-98.483330, 38.712046, zoom = 4) %>%
addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
opacity = 1.0, fillOpacity = 0.5,
fillColor = ~pal(Value),
popup = ~popup_sb,
highlightOptions = highlightOptions(color = "white", weight = 2,
bringToFront = TRUE)) %>%
addLegend(pal = pal,
values = as.numeric(Long_EMP_DAT_CAT_SAN1$Value),
position = "bottomright",
title = "Starbucks")
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs ).
## Need '+proj=longlat +datum=WGS84'
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette YlOrRd is 9
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette YlOrRd is 9
## Returning the palette you asked for with that many colors
# write_rds(Long_EMP_DAT_CAT_SAN,"Long_EMP_DAT_CAT_SAN")
# EMP_DAT_CAT_SAN
industry = "Health and personal care stores"
Long_EMP_DAT_CAT_SAN2 <- Long_EMP_DAT_CAT_SAN %>% select("Description","Year","Value","GeoName") %>%
filter(!GeoName %in% c("United States") ,Description %in% c(industry)) %>%
left_join(.,data.frame(GeoName=states$NAME,state=states$STUSPS),by=c("GeoName" = "GeoName")) %>%
.[!is.na(.$state),-c(4)]
## Warning: Column `GeoName` joining character vector and factor, coercing into
## character vector
Long_EMP_DAT_CAT_SAN2$Value <- as.numeric(Long_EMP_DAT_CAT_SAN2$Value)
# set industry avg data for map layer
m_data_indus <- Long_EMP_DAT_CAT_SAN2 %>% group_by(state) %>% summarise(Value = mean(Value))
m_data_indus <- geo_join( states,m_data_indus,"STUSPS", "state")
## Warning: Column `STUSPS`/`state` joining character vector and factor, coercing
## into character vector
m_data_indus <- m_data_indus[-which(is.na(m_data_indus$Value)),]
# Long_EMP_DAT_CAT_SAN1$Year %>% unique()
# # unique(Long_EMP_DAT_CAT_SAN1$Description)
# EMP_DAT_CAT_SAN[is.na(EMP_DAT_CAT_SAN)] <- 0
# Long_EMP_DAT_CAT_SAN2[is.na(Long_EMP_DAT_CAT_SAN2),]
# Long_EMP_DAT_CAT_SAN2 <- Long_EMP_DAT_CAT_SAN2[-which(is.na(Long_EMP_DAT_CAT_SAN2$Value)),]
# Long_EMP_DAT_CAT_SAN2$Value <- as.numeric(Long_EMP_DAT_CAT_SAN2$Value)
# Long_EMP_DAT_CAT_SAN2$Description
glimpse(m_data_indus)
## Rows: 51
## Columns: 12
## $ STATEFP <chr> "12", "30", "27", "24", "45", "23", "15", "11", "44", "31"...
## $ STATENS <chr> "00294478", "00767982", "00662849", "01714934", "01779799"...
## $ AFFGEOID <chr> "0400000US12", "0400000US30", "0400000US27", "0400000US24"...
## $ GEOID <chr> "12", "30", "27", "24", "45", "23", "15", "11", "44", "31"...
## $ STUSPS <chr> "FL", "MT", "MN", "MD", "SC", "ME", "HI", "DC", "RI", "NE"...
## $ NAME <chr> "Florida", "Montana", "Minnesota", "Maryland", "South Caro...
## $ LSAD <chr> "00", "00", "00", "00", "00", "00", "00", "00", "00", "00"...
## $ ALAND <dbl> 138947364717, 376966832749, 206230065476, 25151726296, 778...
## $ AWATER <dbl> 31362872853, 3869031338, 18942261495, 6979340970, 50758745...
## $ Value <dbl> 80545.190, 2685.143, 18439.238, 22093.143, 17153.286, 4214...
## $ rank <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-80.17628 2..., MULTIPOLYGON ...
restofUS <- Long_EMP_DAT_CAT_SAN2 %>% filter(state != "NY") %>% group_by(Year) %>% summarise(Value=mean(Value))
restofUS$state = "REST"
Long_EMP_DAT_CAT_SAN2 %>% filter(state == "NY") %>% ggplot(mapping = aes(x=Year,y=Value, group=state)) +
geom_line()+ geom_point() + geom_point(data=restofUS,color="red") +
theme(axis.text.x = element_text(angle = 60, colour="gray",hjust = 1,size=rel(0.86)))+
labs(title = "USSFoodBeverage Data for No. Of Jobs and CPI Value in NY",
y = "Sector",x= "Years")

restofUS <- Long_EMP_DAT_CAT_SAN2 %>% filter(state != "NY") %>% group_by(Year) %>% summarise(Value=mean(Value))
restofUS$state = "REST"
Long_EMP_DAT_CAT_SAN2 %>% filter(state == "NY") %>% ggplot(mapping = aes(x=Year,y=Value, group=state)) +
geom_line()+ geom_point() + geom_point(data=restofUS,color="red") +
theme(axis.text.x = element_text(angle = 60, colour="gray",hjust = 1,size=rel(0.86)))+
labs(title = "USSFoodBeverage Data for No. Of Jobs and CPI Value in NY",
y = "Sector",x= "Years")

# Setting up the pop up text
popup_sb <- paste0("Total: ", as.character(paste(m_data_indus$NAME,
format(m_data_indus$Value,nsmall = 3,digits = 3),sep="\n")))
library(rbin)
bins <- summary(m_data_indus$Value)
pal <- colorBin("YlOrRd", domain = m_data_indus$Value, bins = bins, pretty = TRUE,na.color = "#808080", alpha = FALSE, reverse = FALSE,
right = FALSE)
# pal <- colorBin("YlOrRd", domain = (as.numeric(Long_EMP_DAT_CAT_SAN1$Value)),bins=bins)
# Long_EMP_DAT_CAT_SAN1$Value <- as.numeric(Long_EMP_DAT_CAT_SAN1$Value)
m_data_indus %>% leaflet() %>%
addTiles() %>%
setView(-98.483330, 38.712046, zoom = 4) %>%
addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
opacity = 1.0, fillOpacity = 0.5,
fillColor = ~pal(Value),
popup = ~popup_sb,
highlightOptions = highlightOptions(color = "white", weight = 2,
bringToFront = TRUE)) %>%
addLegend(pal = pal,
values = m_data_indus$Value,
position = "bottomright",
title = industry)
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs ).
## Need '+proj=longlat +datum=WGS84'