Data Source

Step 1. Getting and preparing the data.

#install.packages("RMySQL")

library(RMySQL)
## Loading required package: DBI
mydb = dbConnect(MySQL(), user='root', password='South@30', dbname='Jobs', host='localhost')

dbListTables(mydb)
## [1] "companies"    "jobsxwalk"    "output_for_r" "skills"      
## [5] "tblcompanies" "tbljobs"      "tbljobsxwalk" "tbllocations"
## [9] "tblskills"
MySQL <- dbSendQuery(mydb, "SELECT * FROM Jobs.output_for_r")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 3 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 5 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 7 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 8 imported
## as numeric
Rdata = fetch(MySQL, n=-1)

#MySQL<-read.csv(file='C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/SkillsDataforR.csv',header=FALSE)

#head(MySQL)

#colnames(MySQL)<-c("Skills","SkillsKey","Title","TitleKey","Company","CompanyKey","Location","LocationKey","PKey")

#head(MySQL)

#summary(MySQL)

summary(Rdata)
##     SKILL              SKILLKEY      JOBTITLE             JOBID     
##  Length:30410       Min.   : 1.0   Length:30410       Min.   :   1  
##  Class :character   1st Qu.:12.0   Class :character   1st Qu.: 685  
##  Mode  :character   Median :24.0   Mode  :character   Median :1302  
##                     Mean   :26.1                      Mean   :1417  
##                     3rd Qu.:37.0                      3rd Qu.:2099  
##                     Max.   :56.0                      Max.   :3451  
##    COMPANY              COMPID         LOCATION             LOCID      
##  Length:30410       Min.   :   1.0   Length:30410       Min.   :  1.0  
##  Class :character   1st Qu.: 292.0   Class :character   1st Qu.: 39.0  
##  Mode  :character   Median : 729.0   Mode  :character   Median :137.0  
##                     Mean   : 750.6                      Mean   :236.2  
##                     3rd Qu.:1137.0                      3rd Qu.:387.0  
##                     Max.   :1890.0                      Max.   :934.0  
##        ID       
##  Min.   :    1  
##  1st Qu.: 7603  
##  Median :15206  
##  Mean   :15206  
##  3rd Qu.:22808  
##  Max.   :30410
head(Rdata)
##              SKILL SKILLKEY           JOBTITLE JOBID
## 1              all        1 Sr. Data Scientist     1
## 2           python        2 Sr. Data Scientist     1
## 3                r        3 Sr. Data Scientist     1
## 4         modeling       10 Sr. Data Scientist     1
## 5 Machine learning       11 Sr. Data Scientist     1
## 6              SQL       12 Sr. Data Scientist     1
##                     COMPANY COMPID LOCATION LOCID ID
## 1 Confidential, Outsourcing      1       NA     1  1
## 2 Confidential, Outsourcing      1       NA     1  2
## 3 Confidential, Outsourcing      1       NA     1  3
## 4 Confidential, Outsourcing      1       NA     1  4
## 5 Confidential, Outsourcing      1       NA     1  5
## 6 Confidential, Outsourcing      1       NA     1  6

Step 2. Outputing R data into MongoDB

#install.packages("mongolite")

library(mongolite)

my_collection = mongo(collection = "DataScience_Skills_MongoDB", db = "DataScience") # create connection, database and collection

my_collection$insert(Rdata)
## List of 5
##  $ nInserted  : num 30410
##  $ nMatched   : num 0
##  $ nRemoved   : num 0
##  $ nUpserted  : num 0
##  $ writeErrors: list()

Step 3. Checking row count

my_collection$count()
## [1] 30410