library(arules)
## Warning: package 'arules' was built under R version 4.2.1
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(tidygraph)
## Warning: package 'tidygraph' was built under R version 4.2.1
##
## Attaching package: 'tidygraph'
## The following object is masked from 'package:stats':
##
## filter
library(arulesViz)
## Warning: package 'arulesViz' was built under R version 4.2.1
DSSalary<-read.transactions("D:/archive (2)/SM_DSJobs_2.csv", sep=",")
## Warning in asMethod(object): removing duplicated items in transactions
summary(DSSalary)
## transactions as itemMatrix in sparse format with
## 246 rows (elements/itemsets/transactions) and
## 306 columns (items) and a density of 0.02322121
##
## most frequent items:
## 100 L US MI SE (Other)
## 134 132 110 103 77 1192
##
## element (itemset/transaction) length distribution:
## sizes
## 6 7 8
## 10 200 36
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.000 7.000 7.000 7.106 7.000 8.000
##
## includes extended item information - examples:
## labels
## 1 <100K
## 2 <10K
## 3 <15K
inspect(DSSalary[1:20])
## items
## [1] {company_location,
## company_size,
## employee_residence,
## experience_level,
## job_title,
## remote_ratio,
## Salary bin,
## salary_in_usd}
## [2] {<10K,
## 0,
## 2876,
## Data Scientist,
## MI,
## MX,
## S}
## [3] {<10K,
## 100,
## 4000,
## Data Engineer,
## IR,
## M,
## MI}
## [4] {<10K,
## 0,
## 4000,
## Data Scientist,
## EN,
## M,
## VN}
## [5] {<10K,
## 3D Computer Vision Researcher,
## 50,
## 5423,
## IN,
## M,
## MI}
## [6] {<10K,
## 100,
## 5695,
## Data Scientist,
## IN,
## MI,
## S,
## US}
## [7] {<10K,
## 50,
## 5707,
## Data Science Consultant,
## EN,
## IN,
## M}
## [8] {<10K,
## 0,
## 5898,
## Big Data Engineer,
## CH,
## EN,
## IN,
## L}
## [9] {<10K,
## 0,
## 6072,
## Data Analyst,
## EN,
## IN,
## S}
## [10] {<10K,
## 100,
## 6072,
## IN,
## L,
## MI,
## Product Data Analyst}
## [11] {<10K,
## 50,
## 8000,
## Data Analyst,
## L,
## MI,
## PK}
## [12] {<10K,
## 100,
## 9272,
## BI Data Analyst,
## EN,
## KE,
## S}
## [13] {<15K,
## 100,
## 10000,
## Data Analyst,
## EN,
## NG,
## S}
## [14] {<15K,
## 12000,
## 50,
## M,
## Machine Learning Scientist,
## MI,
## PK}
## [15] {<15K,
## 100,
## 12000,
## AI Scientist,
## EN,
## M,
## PK,
## US}
## [16] {<15K,
## 100,
## 12000,
## AI Scientist,
## BR,
## EN,
## S,
## US}
## [17] {<15K,
## 0,
## 13000,
## BR,
## Data Scientist,
## MI,
## S}
## [18] {<15K,
## 0,
## 13105,
## Data Engineer,
## M,
## MI,
## TR}
## [19] {<15K,
## 100,
## 13400,
## Data Scientist,
## EN,
## L,
## UA}
## [20] {<20K,
## 100,
## 15966,
## DE,
## EN,
## ML Engineer,
## S}
itemFrequencyPlot(DSSalary, topN = 20, type = "absolute", main = "top 20 most frequent Items")
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.