library(rvest)
## Warning: package 'rvest' was built under R version 3.4.4
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.4.4
library(knitr)
library(tm)
## Warning: package 'tm' was built under R version 3.4.4
## Loading required package: NLP
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.4
## -- Attaching packages ----------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble 1.4.2 v readr 1.1.1
## v tidyr 0.8.0 v purrr 0.2.4
## v tibble 1.4.2 v forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.4.4
## Warning: package 'tidyr' was built under R version 3.4.4
## Warning: package 'readr' was built under R version 3.4.4
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'forcats' was built under R version 3.4.4
## -- Conflicts -------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::annotate() masks NLP::annotate()
## x dplyr::filter() masks stats::filter()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag() masks stats::lag()
## x purrr::pluck() masks rvest::pluck()
library(SnowballC)
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.4.4
## Loading required package: RColorBrewer
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.4
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(RCurl)
## Loading required package: bitops
##
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
##
## complete
library(bitops)
library(knitr)
Import files to determine Which are relevant to our goal:
TermFrequency <- read.csv(url("https://raw.githubusercontent.com/Shetura36/Data-607-Assignments/master/Project3/TermFrequency_adjusted_10.csv"), header=FALSE, sep = ",")
TermFrequency <- TermFrequency[-c(1), ]
Top13 <- head(TermFrequency, n = 13)
#Selected top 13 Terms used in Data Science jobs on Indeed. We selected these 13 because they appear the most and can be used as a link point for most of the top 100 other terms.
write.csv(Top13, file = "Top13RelevantDataScienceTerms.csv")
dsterms<- ggplot(data=Top13, aes(x=V2, y=V3)) + geom_bar(stat = "identity") + labs(x="Terms",y="Frequency")
dsterms + theme(axis.text.x = element_text(angle = 60, hjust = 1))