library(rvest)

## Warning: package 'rvest' was built under R version 3.4.4

## Loading required package: xml2

## Warning: package 'xml2' was built under R version 3.4.4

library(knitr)
library(tm)

## Warning: package 'tm' was built under R version 3.4.4

## Loading required package: NLP

library(stringr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.4.4

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:NLP':
## 
##     annotate

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 3.4.4

## -- Attaching packages ----------------------------------------------------------------- tidyverse 1.2.1 --

## v tibble  1.4.2     v readr   1.1.1
## v tidyr   0.8.0     v purrr   0.2.4
## v tibble  1.4.2     v forcats 0.3.0

## Warning: package 'tibble' was built under R version 3.4.4

## Warning: package 'tidyr' was built under R version 3.4.4

## Warning: package 'readr' was built under R version 3.4.4

## Warning: package 'purrr' was built under R version 3.4.4

## Warning: package 'forcats' was built under R version 3.4.4

## -- Conflicts -------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::annotate()     masks NLP::annotate()
## x dplyr::filter()         masks stats::filter()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag()            masks stats::lag()
## x purrr::pluck()          masks rvest::pluck()

library(SnowballC)
library(wordcloud)

## Warning: package 'wordcloud' was built under R version 3.4.4

## Loading required package: RColorBrewer

library(plotly)

## Warning: package 'plotly' was built under R version 3.4.4

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(RCurl)

## Loading required package: bitops

## 
## Attaching package: 'RCurl'

## The following object is masked from 'package:tidyr':
## 
##     complete

library(bitops)
library(knitr)

Import files to determine Which are relevant to our goal:

TermFrequency <- read.csv(url("https://raw.githubusercontent.com/Shetura36/Data-607-Assignments/master/Project3/TermFrequency_adjusted_10.csv"), header=FALSE, sep = ",")

TermFrequency <- TermFrequency[-c(1), ]

We need now to determine the relevant terms that encapsulate all the terms we might come across that can be grouped together

Top13 <- head(TermFrequency, n = 13) 

#Selected top 13 Terms used in Data Science jobs on Indeed. We selected these 13 because they appear the most and can be used as a link point for most of the top 100 other terms. 

write.csv(Top13, file = "Top13RelevantDataScienceTerms.csv")

Visualization of Relavent Term Frequency

dsterms<- ggplot(data=Top13, aes(x=V2, y=V3)) + geom_bar(stat = "identity") + labs(x="Terms",y="Frequency")
dsterms + theme(axis.text.x = element_text(angle = 60, hjust = 1))

This data is quite telling as it shows even though hard technical skills are crucial, soft skills are as important as the need for team work and communication are quite high on the list of competencies that are being searched for by recruiting teams.

Project 3 Data Visualization

Alejandro D. Osborne

March 24, 2018

We need now to determine the relevant terms that encapsulate all the terms we might come across that can be grouped together

Visualization of Relavent Term Frequency

This data is quite telling as it shows even though hard technical skills are crucial, soft skills are as important as the need for team work and communication are quite high on the list of competencies that are being searched for by recruiting teams.