# WordCLoud map for the soft and hard skillset that high rating/star companies are
# looking for in engineer vs analyst/scientist Position:
df.stars.5 <- df_all %>%
filter(No_of_Stars ==5 ) %>%
select(job_id, Queried_Salary, Company_Industry, analyst, engineer, scientist , junior, senior ) %>%
inner_join(skills, by = c( 'job_id' = 'id'))
df.stars.5.engg <- df.stars.5 %>%
filter ( engineer ==1) %>%
group_by(skill) %>%
summarize(total_count = n())
df.stars.5.ana.sci <- df.stars.5 %>%
filter ( analyst + scientist > 0 ) %>%
group_by(skill) %>%
summarize(total_count = n())
set.seed(1234)
wordcloud(words = df.stars.5.engg$skill, freq = df.stars.5.engg$total_count, min.freq = 1,
max.words=100, random.order=FALSE, rot.per=0.10,
colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Design Experience could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Microsoft SQL Server could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Responsible could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Shell Scripting could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Business Intelligence could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Google Cloud Platform could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Load Balancing could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Natural Language Processing could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Oral communication skills could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : PostgreSQL could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Power BI could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Presentation could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Problem solving could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Responsibility could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Scheduling could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : TensorFlow could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Teradata could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : TS/SCI Clearance could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Work-Life Balance could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : Writing could not be fit on page. It will not be plotted.
## Warning in wordcloud(words = df.stars.5.engg$skill, freq = df.stars.
## 5.engg$total_count, : ZooKeeper could not be fit on page. It will not be
## plotted.

wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.5.ana.sci$total_count, min.freq = 1,
max.words=100, random.order=FALSE, rot.per=0.10,
colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Entrepreneurial could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Interpersonal skills could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Natural Language Processing could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Organizational skills could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Personal Skills could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Presentation Skills could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Problem solving could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Business Analysis could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Data-driven decision-making could not be fit on page.
## It will not be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Effective communicator could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Integrated Library System could not be fit on page. It
## will not be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Leadership Experience could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Project management could not be fit on page. It will
## not be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Shell Scripting could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Team-oriented could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Team Building could not be fit on page. It will not be
## plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Time management could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Time Management could not be fit on page. It will not
## be plotted.
## Warning in wordcloud(words = df.stars.5.ana.sci$skill, freq = df.stars.
## 5.ana.sci$total_count, : Written communication skills could not be fit on page.
## It will not be plotted.

# Salary Range by titles given by high rating/star companies:
df.stars.5.Salary <- df.stars.5 %>%
group_by(Queried_Salary) %>%
summarize(total_count = n(),
ana_total = sum(analyst),
eng_totals = sum(engineer),
sci_total = sum(scientist),
jr_total = sum(junior),
sr_total = sum(senior))
kable(df.stars.5.Salary)
| <80000 |
37 |
37 |
0 |
0 |
0 |
0 |
| >160000 |
47 |
17 |
0 |
30 |
0 |
0 |
| 100000-119999 |
111 |
33 |
54 |
24 |
0 |
0 |
| 120000-139999 |
136 |
55 |
34 |
77 |
0 |
9 |
| 140000-159999 |
59 |
17 |
28 |
14 |
0 |
8 |
| 80000-99999 |
61 |
25 |
36 |
0 |
0 |
13 |
# ==> We can see from above no junior level roles; No Engineer roles with greater then
# 160,000 USD
#Statewise Job posting for Data Science Jobs:
df.states <- df_all %>%
select(job_id, Location ) %>%
group_by(Location) %>%
summarize(total_count = n())
all_states <- map_data("state")
df.states$region <- stateFromLower(df.states$Location)
Total <- merge(all_states, df.states, by="region")
Total <- Total[Total$region!="REMOTE",]
p <- ggplot()
p <- p + geom_polygon(data=Total, aes(x=long, y=lat, group = group, fill=Total$total_count),colour="white"
) + scale_fill_continuous(low = "#56B4E9", high = "#0072B2", guide="colorbar")
p
