Web Scraping Programmer Jobs With R and rvest

Background
- Pendahuluan
- Script Scraping
Deskripsi Data
- Wordcloud
- Grafik
- Peta
Ringkasan

Background

Pendahuluan

Berdasarkan laporan The Future of Jobs dari World Economic Forum, terdapat sejumlah profesi yang paling dibutuhkan di masa depan, khususnya profesi yang melibatkan ilmu sains dan teknologi. Beberapa profesi yang paling menjanjikan yaitu Software Developer, Web/Mobile App Developer, dan profesi yang berkaitan dengan dunia IT.

Tidak bisa dipungkiri di era digital saat ini, hampir semua perusahaan membutuhkan sistem komputerisasi untuk memajukan setiap unit bisnis mereka. Maka tidak heran profesi di dunia IT semakin dicari dan dibutuhkan. Namun apakah jumlah kebutuhan sumber daya manusia (SDM) untuk profesi ini di Indonesia sudah merata pada masing-masing daerah? serta skill pemrograman apa saja yang paling banyak dibutuhkan perusahaan? Untuk menjawab pertanyaan tersebut dilakukan riset dengan cara web scraping pada salah satu situs pencari kerja yaitu Indeed. Riset dilakukan pada bulan Mei 2019 dengan pencarian kata kunci “programmer” dan menghasilkan sebanyak 387 hasil pencarian.

Script Scraping

library(tidyverse)
library(rvest)
library(xml2)

url <- "https://id.indeed.com/lowongan-kerja?q=programmer&l="
page <- xml2::read_html(url)
#get the job title
job_title <- page %>% 
    rvest::html_nodes("div") %>%
    rvest::html_nodes(xpath = '//*[@data-tn-element = "jobTitle"]') %>%
    rvest::html_attr("title")
#get job title using CSS
page %>% 
  rvest::html_nodes('[data-tn-element="jobTitle"]') %>%
  rvest::html_attr("title")
 
# or
page %>% 
  rvest::html_nodes('a[data-tn-element="jobTitle"]') %>%
  rvest::html_attr("title")

# get company location
page %>% 
    rvest::html_nodes("span") %>% 
    rvest::html_nodes(xpath = '//*[@class="location"]')%>% 
    rvest::html_text() %>%
    stringi::stri_trim_both()

# get company name
page %>% 
    rvest::html_nodes("span")  %>% 
    rvest::html_nodes(xpath = '//*[@class="company"]')  %>% 
    rvest::html_text() %>%
    stringi::stri_trim_both()

#using CSS
# get company location
page %>% 
    rvest::html_nodes("span") %>% 
    rvest::html_nodes(xpath = '//*[@class="location"]')%>% 
    rvest::html_text() %>%
    stringi::stri_trim_both()

# get company name
page %>% 
    rvest::html_nodes("span")  %>% 
    rvest::html_nodes(xpath = '//*[@class="company"]')  %>% 
    rvest::html_text() %>%
    stringi::stri_trim_both()

# get links xpath
page %>% 
  rvest::html_nodes("div") %>%
  rvest::html_nodes(xpath = '//*[@data-tn-element="jobTitle"]') %>%
  rvest::html_attr("href")

# get links CSS selectors
page %>% 
  rvest::html_nodes('[data-tn-element="jobTitle"]') %>%
  rvest::html_attr("href")

# get job description xpath
page %>%
  rvest::html_nodes("div")  %>% 
  rvest::html_nodes(xpath = '//*[@class="jobsearch-jobDescriptionText"]') %>% 
  rvest::html_text() %>%
  stringi::stri_trim_both()

page_result_start <- 10 # starting page 
page_result_end <- 1510 # last page results
page_results <- seq(from = page_result_start, to = page_result_end, by = 10)

full_df <- data.frame()
for(i in seq_along(page_results)) {
  
  first_page_url <- "https://id.indeed.com/lowongan-kerja?q=programmer&l="
  url <- paste0(first_page_url, "&start=", page_results[i])
  page <- xml2::read_html(url)
  # Sys.sleep pauses R for two seconds before it resumes
  # Putting it there avoids error messages such as "Error in open.connection(con, "rb") : Timeout was reached"
  Sys.sleep(2)
  
  #get the job title
  job_title <- page %>% 
    rvest::html_nodes("div") %>%
    rvest::html_nodes(xpath = '//a[@data-tn-element = "jobTitle"]') %>%
    rvest::html_attr("title")
  
  #get the company name
  company_name <- page %>% 
    rvest::html_nodes("span")  %>% 
    rvest::html_nodes(xpath = '//*[@class="company"]')  %>% 
    rvest::html_text() %>%
    stringi::stri_trim_both() -> company.name 
  
  
  #get job location
  job_location <- page %>% 
    rvest::html_nodes("span") %>% 
    rvest::html_nodes(xpath = '//*[@class="location"]')%>% 
    rvest::html_text() %>%
    stringi::stri_trim_both()
  
  # get links
  links <- page %>% 
    rvest::html_nodes("div") %>%
    rvest::html_nodes(xpath = '//*[@data-tn-element="jobTitle"]') %>%
    rvest::html_attr("href")
  
  job_description <- c()
  for(i in seq_along(links)) {
    
    url <- paste0("https://id.indeed.com/", links[i])
    page <- xml2::read_html(url)
    
    job_description[[i]] <- page %>%
      rvest::html_nodes("div")  %>% 
      rvest::html_nodes(xpath = '//*[@class="jobsearch-JobComponent-description icl-u-xs-mt--md"]') %>% 
      rvest::html_text() %>%
      stringi::stri_trim_both()
  }
  df <- data.frame(job_title, company_name, job_location, job_description)
  full_df <- rbind(full_df, df)
}

Deskripsi Data

Wordcloud

#Load library
library(rmdformats)
library(tm)
library(wordcloud2)

text = readLines("E:/BELAJAR/Scrape Indeed/app2.txt")
text <- unlist(strsplit(text, "\t"))

komenc <- Corpus(VectorSource(text))
head(komenc)

## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 6

##Cleaning data
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
twitclean <- tm_map(komenc, removeURL)
removeNL <- function(y) gsub("\n", " ", y)
twitclean <- tm_map(twitclean, removeNL)
replacecomma <- function(y) gsub(",", "", y)
twitclean <- tm_map(twitclean, replacecomma)
removeRT <- function(y) gsub("RT ", "", y)
twitclean <- tm_map(twitclean, removeRT)
removetitik2 <- function(y) gsub(":", "", y)
twitclean <- tm_map(twitclean, removetitik2)
removetitikkoma <- function(y) gsub(";", " ", y)
twitclean <- tm_map(twitclean, removetitikkoma)
removetitik3 <- function(y) gsub("p…", "", y)
twitclean <- tm_map(twitclean, removetitik3)
removeamp <- function(y) gsub("&amp;", "", y)
twitclean <- tm_map(twitclean, removeamp)
removeUN <- function(z) gsub("@\\w+", "", z)
twitclean <- tm_map(twitclean, removeUN)
remove.all <- function(xy) gsub("[^[:alpha:][:space:]]*", "", xy)
twitclean <- tm_map(twitclean,remove.all)
twitclean <- tm_map(twitclean, removePunctuation)
twitclean <- tm_map(twitclean, tolower)

#Menghapus stopword manual
twitclean <- tm_map(twitclean , removeWords, 
                    c('and'))

## Warning in tm_map.SimpleCorpus(twitclean, removeWords, c("and")):
## transformation drops documents

#Build a term-document matrix
{
dtm <- TermDocumentMatrix(twitclean)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
}

library(wordcloud2)
wordcloud2(d,shape = "diamond",
           backgroundColor = "white",
           color = 'random-light' ,
           size = 0.5)

Grafik

Most in-Demand Programming Skill

library(dplyr)
# remove rows in r by row number
app <- d[c(1:20),]
colnames(app) <- c("tools", "freq")  # change column names
app <- app[order(app$freq), ]  # sort
app$tools <- factor(app$tools, levels = app$tools)  # to retain the order in plot.

library(ggplot2)
theme_set(theme_minimal())
# Draw plot
ggplot(app, aes(x=tools, y=freq)) + 
  geom_bar(stat="identity",fill="#FF9999", colour="black")+ 
  labs(title="Most in-Demand Programming Skill", 
       subtitle="source : indeed.com - May, 2019") + 
  theme(axis.text.x = element_text(angle=65, vjust=0.6))

loc <- readLines("E:/BELAJAR/Scrape Indeed/loc.txt")
loc <- unlist(strsplit(loc, "\t"))

location <- Corpus(VectorSource(loc))
##Cleaning data
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
loc2 <- tm_map(location, removeURL)
removeNL <- function(y) gsub("\n", " ", y)
loc2 <- tm_map(loc2, removeNL)
replacecomma <- function(y) gsub(",", "", y)
loc2 <- tm_map(loc2, replacecomma)
removeRT <- function(y) gsub("RT ", "", y)
loc2 <- tm_map(loc2, removeRT)
removetitik2 <- function(y) gsub(":", "", y)
loc2 <- tm_map(loc2, removetitik2)
removetitikkoma <- function(y) gsub(";", " ", y)
loc2 <- tm_map(loc2, removetitikkoma)
removetitik3 <- function(y) gsub("p…", "", y)
loc2 <- tm_map(loc2, removetitik3)
removeamp <- function(y) gsub("&amp;", "", y)
loc2 <- tm_map(loc2, removeamp)
removeUN <- function(z) gsub("@\\w+", "", z)
loc2 <- tm_map(loc2, removeUN)
remove.all <- function(xy) gsub("[^[:alpha:][:space:]]*", "", xy)
loc2 <- tm_map(loc2,remove.all)
loc2 <- tm_map(loc2, removePunctuation)
loc2 <- tm_map(loc2, tolower)

#Build a term-document matrix
{
dtm <- TermDocumentMatrix(loc2)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
k <- data.frame(word = names(v),freq=v)
}

Programmer Job Vacancy based on Location

# remove rows in r by row number
colnames(k) <- c("Area", "Freq")  # change column names
k <- k[order(k$Freq), ]  # sort
k$Area <- factor(k$Area, levels = k$Area)  # to retain the order in plot.

library(ggplot2)
theme_set(theme_minimal())
# Draw plot
ggplot(k, aes(x=Area, y=Freq)) + 
  geom_bar(stat="identity",fill="#FF9999", colour="black")+ 
  labs(title="Where the programmer jobs are", 
       subtitle="area with the most job posting for programmer roles",
       caption="source : indeed.com - May, 2019") + 
  theme(axis.text.x = element_text(angle=65, vjust=0.6))

jkt <- readLines("E:/BELAJAR/Scrape Indeed/jkt.txt")
jkt <- unlist(strsplit(jkt, "\t"))

jkt <- Corpus(VectorSource(jkt))
head(jkt)

## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 6

##Cleaning data
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
jkt <- tm_map(jkt, removeURL)
removeNL <- function(y) gsub("\n", " ", y)
jkt <- tm_map(jkt, removeNL)
replacecomma <- function(y) gsub(",", "", y)
jkt <- tm_map(jkt, replacecomma)
removeRT <- function(y) gsub("RT ", "", y)
jkt <- tm_map(jkt, removeRT)
removetitik2 <- function(y) gsub(":", "", y)
jkt <- tm_map(jkt, removetitik2)
removetitikkoma <- function(y) gsub(";", " ", y)
jkt <- tm_map(jkt, removetitikkoma)
removetitik3 <- function(y) gsub("p…", "", y)
jkt <- tm_map(jkt, removetitik3)
removeamp <- function(y) gsub("&amp;", "", y)
jkt <- tm_map(jkt, removeamp)
removeUN <- function(z) gsub("@\\w+", "", z)
jkt <- tm_map(jkt, removeUN)
remove.all <- function(xy) gsub("[^[:alpha:][:space:]]*", "", xy)
jkt <- tm_map(jkt,remove.all)
jkt <- tm_map(jkt, removePunctuation)
jkt <- tm_map(jkt, tolower)

#Build a term-document matrix
{
dtm <- TermDocumentMatrix(jkt)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
jkt <- data.frame(word = names(v),freq=v)
}

Jakarta

jkt <- d[c(1:20),]
# remove rows in r by row number
colnames(jkt) <- c("Tools", "Freq")  # change column names
jkt <- jkt[order(jkt$Freq), ]  # sort
jkt$Area <- factor(jkt$Tools, levels = jkt$Tools)  # to retain the order in plot.

library(ggplot2)
theme_set(theme_minimal())
# Draw plot
ggplot(jkt, aes(x=Tools, y=Freq)) + 
  geom_bar(stat="identity",fill="#FF9999", colour="black")+ 
  labs(title="Programming skill that are in demand in Jakarta", 
       caption="source : indeed.com - May, 2019") + 
  theme(axis.text.x = element_text(angle=65, vjust=0.6))

library(tm)
tn <- readLines("E:/BELAJAR/Scrape Indeed/tng.txt")
tn <- unlist(strsplit(tn, "\t"))

tn <- Corpus(VectorSource(tn))


##Cleaning data
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
tn <- tm_map(tn, removeURL)
removeNL <- function(y) gsub("\n", " ", y)
tn <- tm_map(tn, removeNL)
replacecomma <- function(y) gsub(",", "", y)
tn <- tm_map(tn, replacecomma)
removeRT <- function(y) gsub("RT ", "", y)
tn <- tm_map(tn, removeRT)
removetitik2 <- function(y) gsub(":", "", y)
tn <- tm_map(tn, removetitik2)
removetitikkoma <- function(y) gsub(";", " ", y)
tn <- tm_map(tn, removetitikkoma)
removetitik3 <- function(y) gsub("p…", "", y)
tn <- tm_map(tn, removetitik3)
removeamp <- function(y) gsub("&amp;", "", y)
tn <- tm_map(tn, removeamp)
removeUN <- function(z) gsub("@\\w+", "", z)
tn <- tm_map(tn, removeUN)
remove.all <- function(xy) gsub("[^[:alpha:][:space:]]*", "", xy)
tn <- tm_map(tn,remove.all)
tn <- tm_map(tn, removePunctuation)
tn <- tm_map(tn, tolower)

#Build a term-document matrix
{
dtm <- TermDocumentMatrix(tn)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
tn <- data.frame(word = names(v),freq=v)
}
tn <- head(tn,10)

# remove rows in r by row number
colnames(tn) <- c("Tools", "Freq")  # change column names
tn <- tn[order(tn$Freq), ]  # sort
tn$Area <- factor(tn$Tools, levels = tn$Tools)  # to retain the order in plot.

Tangerang

library(ggplot2)
theme_set(theme_minimal())
# Draw plot
ggplot(tn, aes(x=Tools, y=Freq)) + 
  geom_bar(stat="identity",fill="#FF9999", colour="black")+ 
  labs(title="Programming skill that are in demand in Tangerang", 
       caption="source : indeed.com - May, 2019") + 
  theme(axis.text.x = element_text(angle=65, vjust=0.6))

sby <- readLines("E:/BELAJAR/Scrape Indeed/sby.txt")
sby <- unlist(strsplit(sby, "\t"))

sby <- Corpus(VectorSource(sby))
head(sby)

## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 6

##Cleaning data
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
sby <- tm_map(sby, removeURL)
removeNL <- function(y) gsub("\n", " ", y)
sby <- tm_map(sby, removeNL)
replacecomma <- function(y) gsub(",", "", y)
sby <- tm_map(sby, replacecomma)
removeRT <- function(y) gsub("RT ", "", y)
sby <- tm_map(sby, removeRT)
removetitik2 <- function(y) gsub(":", "", y)
sby <- tm_map(sby, removetitik2)
removetitikkoma <- function(y) gsub(";", " ", y)
sby <- tm_map(sby, removetitikkoma)
removetitik3 <- function(y) gsub("p…", "", y)
sby <- tm_map(sby, removetitik3)
removeamp <- function(y) gsub("&amp;", "", y)
sby <- tm_map(sby, removeamp)
removeUN <- function(z) gsub("@\\w+", "", z)
sby <- tm_map(sby, removeUN)
remove.all <- function(xy) gsub("[^[:alpha:][:space:]]*", "", xy)
sby <- tm_map(sby,remove.all)
sby <- tm_map(sby, removePunctuation)
sby <- tm_map(sby, tolower)

sby <- tm_map(sby , removeWords, 
                    c('and'))

#Build a term-document matrix
{
dtm <- TermDocumentMatrix(sby)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
sby <- data.frame(word = names(v),freq=v)
}

sby <- head(sby, 10)
# remove rows in r by row number
colnames(sby) <- c("Tools", "Freq")  # change column names
sby <- sby[order(sby$Freq), ]  # sort
sby$Area <- factor(sby$Tools, levels = sby$Tools)  # to retain the order in plot.

Surabaya

library(ggplot2)
theme_set(theme_minimal())
# Draw plot
ggplot(sby, aes(x=Tools, y=Freq)) + 
  geom_bar(stat="identity",fill="#FF9999", colour="black")+ 
  labs(title="Programming skill that are in demand in Surabaya", 
       caption="source : indeed.com - May, 2019") + 
  theme(axis.text.x = element_text(angle=65, vjust=0.6))

bdg <- readLines("E:/BELAJAR/Scrape Indeed/bdg.txt")
bdg <- unlist(strsplit(bdg, "\t"))

bdg <- Corpus(VectorSource(bdg))
head(bdg)

## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 6

##Cleaning data
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
bdg <- tm_map(bdg, removeURL)
removeNL <- function(y) gsub("\n", " ", y)
bdg <- tm_map(bdg, removeNL)
replacecomma <- function(y) gsub(",", "", y)
bdg <- tm_map(bdg, replacecomma)
removeRT <- function(y) gsub("RT ", "", y)
bdg <- tm_map(bdg, removeRT)
removetitik2 <- function(y) gsub(":", "", y)
bdg <- tm_map(bdg, removetitik2)
removetitikkoma <- function(y) gsub(";", " ", y)
bdg <- tm_map(bdg, removetitikkoma)
removetitik3 <- function(y) gsub("p…", "", y)
bdg <- tm_map(bdg, removetitik3)
removeamp <- function(y) gsub("&amp;", "", y)
bdg <- tm_map(bdg, removeamp)
removeUN <- function(z) gsub("@\\w+", "", z)
bdg <- tm_map(bdg, removeUN)
remove.all <- function(xy) gsub("[^[:alpha:][:space:]]*", "", xy)
bdg <- tm_map(bdg,remove.all)
bdg <- tm_map(bdg, removePunctuation)
bdg <- tm_map(bdg, tolower)

bdg <- tm_map(bdg , removeWords, 
                    c('and'))

#Build a term-document matrix
{
dtm <- TermDocumentMatrix(bdg)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
bdg <- data.frame(word = names(v),freq=v)
}

bdg <- head(bdg, 10)
# remove rows in r by row number
colnames(bdg) <- c("Tools", "Freq")  # change column names
bdg <- bdg[order(bdg$Freq), ]  # sort
bdg$Area <- factor(bdg$Tools, levels = bdg$Tools)  # to retain the order in plot.

Bandung

library(ggplot2)
theme_set(theme_minimal())
# Draw plot
ggplot(bdg, aes(x=Tools, y=Freq)) + 
  geom_bar(stat="identity",fill="#FF9999", colour="black")+ 
  labs(title="Programming skill that are in demand in Bandung", 
       caption="source : indeed.com - May, 2019") + 
  theme(axis.text.x = element_text(angle=65, vjust=0.6))

jog <- readLines("E:/BELAJAR/Scrape Indeed/jog.txt")
jog <- unlist(strsplit(jog, "\t"))

jog <- Corpus(VectorSource(jog))
head(jog)

## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 6

##Cleaning data
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
jog <- tm_map(jog, removeURL)
removeNL <- function(y) gsub("\n", " ", y)
jog <- tm_map(jog, removeNL)
replacecomma <- function(y) gsub(",", "", y)
jog <- tm_map(jog, replacecomma)
removeRT <- function(y) gsub("RT ", "", y)
jog <- tm_map(jog, removeRT)
removetitik2 <- function(y) gsub(":", "", y)
jog <- tm_map(jog, removetitik2)
removetitikkoma <- function(y) gsub(";", " ", y)
jog <- tm_map(jog, removetitikkoma)
removetitik3 <- function(y) gsub("p…", "", y)
jog <- tm_map(jog, removetitik3)
removeamp <- function(y) gsub("&amp;", "", y)
jog <- tm_map(jog, removeamp)
removeUN <- function(z) gsub("@\\w+", "", z)
jog <- tm_map(jog, removeUN)
remove.all <- function(xy) gsub("[^[:alpha:][:space:]]*", "", xy)
jog <- tm_map(jog,remove.all)
jog <- tm_map(jog, removePunctuation)
jog <- tm_map(jog, tolower)

jog <- tm_map(jog , removeWords, 
                    c('and'))

#Build a term-document matrix
{
dtm <- TermDocumentMatrix(jog)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
jog <- data.frame(word = names(v),freq=v)
}

jog <- head(jog, 10)
# remove rows in r by row number
colnames(jog) <- c("Tools", "Freq")  # change column names
jog <- jog[order(jog$Freq), ]  # sort
jog$Area <- factor(jog$Tools, levels = jog$Tools)  # to retain the order in plot.

Yogyakarta

library(ggplot2)
theme_set(theme_minimal())
# Draw plot
ggplot(jog, aes(x=Tools, y=Freq)) + 
  geom_bar(stat="identity",fill="#FF9999", colour="black")+ 
  labs(title="Programming skill that are in demand in Yogyakarta", 
       caption="source : indeed.com - May, 2019") + 
  theme(axis.text.x = element_text(angle=65, vjust=0.6))

Peta

# Library
library(leaflet)

# load data
new <- read.csv("E:/BELAJAR/Scrape Indeed/k.csv", sep = ";")
# Create a color palette with handmade bins.
mybins=seq(0, 250, by=50)
mypalette = colorBin( palette="YlOrBr", domain=new$Freq, na.color="transparent", bins=mybins)

# Prepar the text for the tooltip:
mytext=paste("Job vacancy: ", new$Freq) %>%
  lapply(htmltools::HTML)
# Final Map
z <- leaflet(new) %>% 
  addTiles()  %>% 
  setView( lat= -5, lng=115 , zoom=4.5) %>%
  addProviderTiles("Esri.WorldImagery") %>%
  addCircleMarkers(~lon, ~lat, 
                   fillColor = ~mypalette(Freq), fillOpacity = 0.7, color="white", radius=8, stroke=FALSE,
                   label = mytext,
                   labelOptions = labelOptions( style = list("font-weight" = "normal", padding = "3px 8px"), 
                                                textsize = "13px", direction = "auto")
  ) %>%
  addLegend( pal=mypalette, values=~Freq, opacity=0.9, title = "Frequency", position = "bottomright" )
z

Ringkasan

Berdasarkan lowongan pekerjaan programmer yang dimuat pada web Indeed diketahui bahwa 66% diantaranya berada di Jakarta. Hal ini tidak mengherankan karena Jakarta merupakan pusat industri dan ekonomi di wilayah Indonesia. Sementara itu apabila dilihat dari peta maka dapat terlihat ketimpangan lowongan pekerjaan antara pulau Jawa dengan pulau lainnya. Hal ini juga mengindikasikan adanya ketimpangan industri dan ekonomi yang masih kurang merata pada masing-masing daerah.
Kebutuhan pekerjaan programmer paling banyak ditemukan pada lima wilayah berikut secara berurutan yaitu: Jakarta,Surabaya, Bandung, Tangerang, Yogyakarta.
Secara umum dapat dilihat pada output wordcloud dan grafik bahwa skill pemrograman yang paling banyak dicari oleh perusahaan di Indonesia yaitu : SQL, HTML, php, javascript, dan CSS