Leverage dplyr to summarize the job skills data. I determine if a particular skill is determined in each job description. If it is included, I assign a value of 1 and a 0 if not. Next I group by skill and calculate totals and percentage. Finally the tibble is sorted in descending order.
df <- as_tibble(indeedDesc) %>%
rename(JobDesc = value) %>%
rowid_to_column("id") %>%
mutate(n = n()) %>%
mutate(job = str_c("job",id,sep='')) %>%
mutate(JobDesc = str_replace_all(JobDesc,'\n',' ')) %>%
mutate(JobDesc = str_replace_all(JobDesc,'\\<U+00B7\\>','')) %>%
mutate(Python = if_else(str_detect(JobDesc,fixed('python',ignore_case = TRUE)),1,0)) %>%
mutate(Excel = if_else(str_detect(JobDesc,fixed('Excel',ignore_case = TRUE)),1,0)) %>%
mutate(Mongodb = if_else(str_detect(JobDesc,fixed('Mongo',ignore_case = TRUE)),1,0)) %>%
mutate(R = if_else(str_detect(JobDesc,fixed('R,',ignore_case = TRUE)),1,0)) %>%
mutate(CompSci = if_else(str_detect(JobDesc,fixed('Computer Science',ignore_case = TRUE)),1,0)) %>%
mutate(Communication = if_else(str_detect(JobDesc,fixed('Communication Skills',ignore_case = TRUE)),1,0)) %>%
mutate(SQL = if_else(str_detect(JobDesc,fixed('SQL',ignore_case = TRUE)),1,0)) %>%
mutate(AI = if_else(str_detect(JobDesc,fixed('Artificial',ignore_case = TRUE)),1,0)) %>%
mutate(Predictive = if_else(str_detect(JobDesc,fixed('predictive',ignore_case = TRUE)),1,0)) %>%
mutate(ML = if_else(str_detect(JobDesc,fixed('machine learning',ignore_case = TRUE)),1,0)) %>%
mutate(Statistics = if_else(str_detect(JobDesc,fixed('Statistics',ignore_case = TRUE)),1,0)) %>%
mutate(BigData = if_else(str_detect(JobDesc,fixed('Big Data',ignore_case = TRUE)),1,0)) %>%
mutate(Neural = if_else(str_detect(JobDesc,fixed('Neural',ignore_case = TRUE)),1,0)) %>%
mutate(Visualization = if_else(str_detect(JobDesc,fixed('visualization',ignore_case = TRUE)),1,0)) %>%
mutate(Regression = if_else(str_detect(JobDesc,fixed('Regression',ignore_case = TRUE)),1,0)) %>%
mutate(TextMining = if_else(str_detect(JobDesc,fixed('text minging',ignore_case = TRUE)),1,0)) %>%
mutate(Matlab = if_else(str_detect(JobDesc,fixed('Matlab',ignore_case = TRUE)),1,0)) %>%
mutate(SAS = if_else(str_detect(JobDesc,fixed('SAS',ignore_case = TRUE)),1,0)) %>%
mutate(Cloud = if_else(str_detect(JobDesc,fixed('Cloud',ignore_case = TRUE)),1,0)) %>%
gather('Python', 'R', 'SQL', 'AI', 'Predictive', 'ML', 'Statistics', 'BigData', 'Neural', 'Regression', 'TextMining', 'Matlab','SAS','Cloud', 'Visualization', 'Excel', 'Mongodb', 'CompSci','Communication',key=skill, value=value) %>%
mutate(Percent = round((value / n)* 100)) %>%
select(skill, value, Percent) %>%
group_by(skill) %>%
summarize(value = sum(value), Percent=sum(Percent)) %>%
arrange(desc(value))
cloud_df <- df