1. Pendahuluan Proyek ini bertujuan untuk melakukan eksplorasi data terhadap dataset lowongan pekerjaan di bidang AI. Dataset yang digunakan merupakan file .csv dengan judul AI Job Dataset dan mencakup informasi tentang posisi pekerjaan, gaji, lokasi, pengalaman kerja, pendidikan, dan manfaat yang ditawarkan perusahaan.

Tujuan eksplorasi adalah untuk: -Membersihkan dan menstandarkan data. -Menganalisis distribusi dan pola dari berbagai variabel. -Menginterpretasikan temuan yang dapat digunakan untuk pengambilan keputusan atau analisis lanjutan.

  1. Pemahaman dan Preproses Data
#Struktur Data
data <- read.csv("D:/AI_Job_Dataset.csv", sep=";", stringsAsFactors = FALSE)
str(data)
## 'data.frame':    15000 obs. of  19 variables:
##  $ job_id                : chr  "AI00001" "AI00002" "AI00003" "AI00004" ...
##  $ job_title             : chr  "AI Research Scientist" "AI Software Engineer" "AI Specialist" "NLP Engineer" ...
##  $ salary_usd            : chr  NA "61895" "152626" "80215" ...
##  $ salary_currency       : chr  "USD" "USD" "USD" "USD" ...
##  $ experience_level      : chr  "SE" "EN" "MI" "SE" ...
##  $ employment_type       : chr  "CT" "CT" "FL" "FL" ...
##  $ company_location      : chr  "China" "Canada" "Switzerland" "India" ...
##  $ company_size          : chr  "m" "M" "L" "M" ...
##  $ employee_residence    : chr  "China" "Ireland" "South Korea" "India" ...
##  $ remote_ratio          : chr  "50" "100" "0" "50" ...
##  $ required_skills       : chr  "Tableau, PyTorch, Kubernetes, Linux, NLP" "Deep Learning, AWS, Mathematics, Python, Docker" "Kubernetes, Deep Learning, Java, Hadoop, NLP" "Scala, SQL, Linux, Python" ...
##  $ education_required    : chr  "bachelor" "Master" "associate" "PhD" ...
##  $ years_experience      : int  9 1 2 7 0 7 3 0 7 5 ...
##  $ industry              : chr  "Automotive" "Media" "Education" "Consulting" ...
##  $ posting_date          : chr  "10/18/2024" "11/20/2024" "3/18/2025" "23-Dec-24" ...
##  $ application_deadline  : chr  "11/07/2024" "01/11/2025" "04/07/2025" "2/24/2025" ...
##  $ job_description_length: int  1076 1268 1974 1345 1989 819 1936 1286 551 2340 ...
##  $ benefits_score        : num  99.9 5.2 9.4 8.6 6.6 5.9 6.3 7.6 9.3 5.8 ...
##  $ company_name          : chr  "Smart Analytics" "TechCorp Inc" "Autonomous Tech" "Future Systems" ...
colSums(is.na(data))
##                 job_id              job_title             salary_usd 
##                      0                      0                   2143 
##        salary_currency       experience_level        employment_type 
##                      0                      0                      0 
##       company_location           company_size     employee_residence 
##                      0                      0                      0 
##           remote_ratio        required_skills     education_required 
##                      0                      0                      0 
##       years_experience               industry           posting_date 
##                      0                      0                      0 
##   application_deadline job_description_length         benefits_score 
##                      0                      0                      0 
##           company_name 
##                      0
#Pembersihan  kolom
data$X <- NULL  # Kolom ID tidak digunakan
colSums(is.na(data))
##                 job_id              job_title             salary_usd 
##                      0                      0                   2143 
##        salary_currency       experience_level        employment_type 
##                      0                      0                      0 
##       company_location           company_size     employee_residence 
##                      0                      0                      0 
##           remote_ratio        required_skills     education_required 
##                      0                      0                      0 
##       years_experience               industry           posting_date 
##                      0                      0                      0 
##   application_deadline job_description_length         benefits_score 
##                      0                      0                      0 
##           company_name 
##                      0
  1. Treatment Cleaning per Kolom
##  [1] "AI Research Scientist"       "AI Software Engineer"       
##  [3] "AI Specialist"               "NLP Engineer"               
##  [5] "AI Consultant"               "AI Architect"               
##  [7] "Principal Data Scientist"    "Data Analyst"               
##  [9] "Autonomous Systems Engineer" "AI Product Manager"         
## [11] "Machine Learning Engineer"   "Data Engineer"              
## [13] "Research Scientist"          "ML Ops Engineer"            
## [15] "Robotics Engineer"           "Head of AI"                 
## [17] "Deep Learning Engineer"      "Data Scientist"             
## [19] "Machine Learning Researcher" "Computer Vision Engineer"   
## [21] "computer vision engineer"
##  [1] "AI Research Scientist"       "AI Software Engineer"       
##  [3] "AI Specialist"               "NLP Engineer"               
##  [5] "AI Consultant"               "AI Architect"               
##  [7] "Principal Data Scientist"    "Data Analyst"               
##  [9] "Autonomous Systems Engineer" "AI Product Manager"         
## [11] "Machine Learning Engineer"   "Data Engineer"              
## [13] "Research Scientist"          "ML Ops Engineer"            
## [15] "Robotics Engineer"           "Head Of AI"                 
## [17] "Deep Learning Engineer"      "Data Scientist"             
## [19] "Machine Learning Researcher" "Computer Vision Engineer"
## [1] "character"

## [1] "USD" "EUR" "GBP"
## [1] "m" "M" "L" "S" "s" "l"
## [1] "50"          "100"         "0"           "0%"          "fifty%"     
## [6] "100%"        "50%"         "fifty"       "100 percent"
## [1] "50"          "100"         "0"           "fifty"       "100 percent"
## [1]  50 100   0
##  [1] "bachelor"  "Master"    "associate" "PhD"       "MASTER"    "ASSOCIATE"
##  [7] "phd"       "PHD"       "Bachelor"  "Associate" "master"    "BACHELOR"
## [1] "Bachelor"  "Master"    "Associate" "PhD"
## [1] FALSE

##   [1] "10/18/2024" "11/20/2024" "3/18/2025"  "23-Dec-24"  "15-Apr-25" 
##   [6] "8/31/2024"  "12/29/2024" "06/07/2024" "04-Nov-24"  "20-Oct-24" 
##  [11] "1/29/2025"  "16-Jul-24"  "29-Dec-24"  "02/11/2024" "02-Oct-24" 
##  [16] "9/28/2024"  "02/06/2025" "24-Apr-24"  "4/30/2024"  "01-May-24" 
##  [21] "6/21/2024"  "23-Apr-24"  "13-Mar-25"  "27-Oct-24"  "08/01/2024"
##  [26] "18-Nov-24"  "01/10/2025" "10/08/2024" "1/18/2024"  "16-May-24" 
##  [31] "1/31/2024"  "8/28/2024"  "03/01/2024" "10/26/2024" "31-Jan-24" 
##  [36] "10/17/2024" "1/14/2024"  "15-Jun-24"  "2/24/2024"  "3/17/2025" 
##  [41] "02/01/2025" "4/24/2024"  "03/03/2025" "10-Jan-24"  "11/16/2024"
##  [46] "09/05/2024" "08/08/2024" "03/10/2025" "8/16/2024"  "03/12/2024"
##  [51] "07-Jul-24"  "08/05/2024" "7/29/2024"  "28-Sep-24"  "3/22/2025" 
##  [56] "02/09/2024" "3/20/2024"  "6/22/2024"  "05-Jun-24"  "3/23/2024" 
##  [61] "20-Feb-24"  "12/15/2024" "15-Dec-24"  "01-Sep-24"  "02/09/2025"
##  [66] "30-Aug-24"  "31-Oct-24"  "14-Feb-24"  "31-May-24"  "08/02/2024"
##  [71] "11/29/2024" "04/08/2025" "19-Sep-24"  "06/10/2024" "20-Apr-24" 
##  [76] "13-Feb-25"  "31-Aug-24"  "04/01/2024" "02/05/2024" "6/20/2024" 
##  [81] "6/19/2024"  "14-Jul-24"  "16-Apr-25"  "15-Feb-25"  "20-May-24" 
##  [86] "11/19/2024" "26-Apr-25"  "10-Jul-24"  "04/10/2024" "04/05/2025"
##  [91] "12/04/2024" "01/12/2025" "4/20/2024"  "01/02/2025" "25-Jan-24" 
##  [96] "27-Jan-24"  "21-Jul-24"  "03-Mar-25"  "9/15/2024"  "1/28/2024" 
## [101] "12/13/2024" "09/04/2024" "08-Mar-24"  "5/13/2024"  "3/18/2024" 
## [106] "13-Mar-24"  "14-Aug-24"  "06/04/2024" "08/11/2024" "10/22/2024"
## [111] "15-Apr-24"  "5/18/2024"  "01/06/2025" "26-Mar-25"  "19-Apr-25" 
## [116] "12/25/2024" "04-Dec-24"  "24-Aug-24"  "18-Mar-25"  "03-Aug-24" 
## [121] "03/02/2025" "3/28/2024"  "02/03/2025" "16-Jan-25"  "08/09/2024"
## [126] "11/06/2024" "08-Aug-24"  "05-Jan-25"  "24-May-24"  "12-Apr-25" 
## [131] "9/23/2024"  "03/09/2024" "11/17/2024" "22-May-24"  "12/02/2024"
## [136] "3/30/2024"  "04-Jul-24"  "25-Oct-24"  "4/21/2024"  "23-May-24" 
## [141] "01/12/2024" "19-Mar-24"  "03/04/2025" "12-Dec-24"  "3/14/2024" 
## [146] "6/25/2024"  "05-Oct-24"  "1/30/2025"  "11-Aug-24"  "7/19/2024" 
## [151] "06-Jan-24"  "25-Aug-24"  "20-Nov-24"  "3/15/2025"  "3/13/2025" 
## [156] "01/08/2025" "09-Nov-24"  "01-Nov-24"  "11-Jul-24"  "10-Mar-24" 
## [161] "9/20/2024"  "1/28/2025"  "10/07/2024" "06/01/2024" "07/07/2024"
## [166] "02/06/2024" "30-Sep-24"  "5/23/2024"  "20-Dec-24"  "10-Jun-24" 
## [171] "10/15/2024" "02/01/2024" "7/14/2024"  "07-Aug-24"  "17-Jul-24" 
## [176] "17-Feb-25"  "2/15/2025"  "2/14/2024"  "07-Feb-25"  "25-Feb-24" 
## [181] "23-Aug-24"  "29-Mar-25"  "20-Mar-25"  "6/14/2024"  "22-Nov-24" 
## [186] "11/26/2024" "08-Oct-24"  "3/26/2025"  "29-Mar-24"  "14-Sep-24" 
## [191] "19-Mar-25"  "1/30/2024"  "4/15/2025"  "7/15/2024"  "09/08/2024"
## [196] "4/15/2024"  "1/25/2025"  "07-Feb-24"  "4/20/2025"  "02/08/2025"
## [201] "1/27/2024"  "09-Mar-24"  "4/18/2024"  "1/15/2025"  "30-Mar-25" 
## [206] "10/11/2024" "04-Feb-25"  "22-Mar-24"  "07/03/2024" "4/27/2025" 
## [211] "16-Dec-24"  "07/01/2024" "19-Feb-24"  "9/19/2024"  "02/11/2025"
## [216] "03/06/2025" "12/09/2024" "02-Feb-25"  "12-Apr-24"  "11/09/2024"
## [221] "25-Dec-24"  "9/13/2024"  "10-Oct-24"  "28-Jan-25"  "04-Mar-25" 
## [226] "1/20/2024"  "05-May-24"  "12/14/2024" "05/06/2024" "21-Jan-24" 
## [231] "8/20/2024"  "07-Dec-24"  "15-Mar-25"  "21-Dec-24"  "03/11/2024"
## [236] "1/27/2025"  "06-Jun-24"  "12/17/2024" "03-Apr-25"  "21-Mar-25" 
## [241] "26-Jun-24"  "4/18/2025"  "2/17/2025"  "03/08/2025" "27-Jun-24" 
## [246] "10/25/2024" "28-Apr-24"  "4/27/2024"  "03-Feb-25"  "8/22/2024" 
## [251] "08-Feb-25"  "2/16/2024"  "10/09/2024" "05-Feb-24"  "5/17/2024" 
## [256] "1/19/2025"  "2/23/2024"  "30-Apr-25"  "2/22/2024"  "05/05/2024"
## [261] "27-Apr-24"  "4/23/2025"  "3/14/2025"  "3/24/2024"  "04/11/2025"
## [266] "20-Jun-24"  "2/18/2025"  "24-Feb-24"  "1/22/2024"  "3/26/2024" 
## [271] "04/07/2025" "15-Jan-25"  "04-Apr-25"  "06-Mar-25"  "8/24/2024" 
## [276] "05/02/2024" "2/26/2025"  "27-Jul-24"  "10/19/2024" "8/19/2024" 
## [281] "04-Jan-24"  "09-Aug-24"  "18-Jan-25"  "7/31/2024"  "06-Apr-25" 
## [286] "12-Aug-24"  "06-Oct-24"  "12/16/2024" "13-Aug-24"  "6/23/2024" 
## [291] "03-Jul-24"  "3/21/2025"  "06-May-24"  "22-Jun-24"  "12-Jan-25" 
## [296] "08-Jan-24"  "28-Jul-24"  "15-Jan-24"  "4/14/2024"  "1/29/2024" 
## [301] "02-Jun-24"  "01-Apr-25"  "3/31/2024"  "06-Jul-24"  "10/05/2024"
## [306] "08/07/2024" "8/13/2024"  "04/09/2025" "5/16/2024"  "04/04/2024"
## [311] "12/26/2024" "8/21/2024"  "17-May-24"  "1/13/2024"  "03/07/2025"
## [316] "08/12/2024" "5/20/2024"  "8/14/2024"  "06-Jan-25"  "5/21/2024" 
## [321] "14-Jun-24"  "1/21/2024"  "5/15/2024"  "30-Mar-24"  "4/25/2025" 
## [326] "01/08/2024" "8/15/2024"  "05/01/2024" "6/17/2024"  "07/06/2024"
## [331] "05/07/2024" "22-Apr-24"  "21-Apr-25"  "13-Feb-24"  "8/23/2024" 
## [336] "10/31/2024" "10/12/2024" "03/09/2025" "1/26/2025"  "18-Feb-25" 
## [341] "05/11/2024" "1/31/2025"  "04/10/2025" "10/04/2024" "6/15/2024" 
## [346] "02-Nov-24"  "09/02/2024" "11/27/2024" "3/27/2025"  "11/13/2024"
## [351] "06/02/2024" "01/09/2024" "12-Jun-24"  "14-Apr-24"  "12-Oct-24" 
## [356] "18-Feb-24"  "11/11/2024" "02-Aug-24"  "12/10/2024" "06-Aug-24" 
## [361] "08-Apr-24"  "5/14/2024"  "22-Dec-24"  "12/12/2024" "7/25/2024" 
## [366] "04/12/2025" "01/11/2024" "3/31/2025"  "07/05/2024" "2/28/2024" 
## [371] "11/07/2024" "09-Sep-24"  "3/19/2025"  "17-Sep-24"  "14-Dec-24" 
## [376] "05/09/2024" "11/24/2024" "17-Apr-25"  "09/12/2024" "14-Apr-25" 
## [381] "01/04/2025" "07-May-24"  "06/08/2024" "3/20/2025"  "2/29/2024" 
## [386] "27-Feb-25"  "01/03/2024" "6/27/2024"  "4/29/2025"  "28-Feb-25" 
## [391] "5/26/2024"  "17-Oct-24"  "01-Feb-25"  "10-Aug-24"  "7/26/2024" 
## [396] "13-Jul-24"  "02/12/2025" "6/16/2024"  "13-Jan-25"  "10/16/2024"
## [401] "25-Apr-25"  "08-Feb-24"  "17-Jan-24"  "04/08/2024" "4/25/2024" 
## [406] "14-Nov-24"  "7/30/2024"  "10/21/2024" "03/08/2024" "18-Jan-24" 
## [411] "04-May-24"  "12/05/2024" "9/16/2024"  "08-Jul-24"  "06/11/2024"
## [416] "13-Jan-24"  "31-Dec-24"  "09/01/2024" "02/02/2024" "1/17/2024" 
## [421] "13-Dec-24"  "04-Mar-24"  "05/03/2024" "12-May-24"  "03/05/2024"
## [426] "02/10/2024" "01-Mar-24"  "19-Feb-25"  "07/08/2024" "24-Nov-24" 
## [431] "23-Sep-24"  "30-May-24"  "10/10/2024" "19-Jan-25"  "23-Nov-24" 
## [436] "26-Mar-24"  "03-Mar-24"  "2/19/2025"  "27-Nov-24"  "07-Sep-24" 
## [441] "06/12/2024" "8/17/2024"  "4/24/2025"  "5/25/2024"  "10/23/2024"
## [446] "19-Aug-24"  "14-Oct-24"  "15-Oct-24"  "10-Apr-24"  "2/24/2025" 
## [451] "20-Mar-24"  "21-Mar-24"  "12/28/2024" "29-Feb-24"  "20-Sep-24" 
## [456] "07-Apr-24"  "09/07/2024" "07-Mar-25"  "7/18/2024"  "29-Apr-24" 
## [461] "11/02/2024" "09-Apr-25"  "11/18/2024" "4/21/2025"  "26-May-24" 
## [466] "24-Dec-24"  "04/06/2024" "08/06/2024" "11/21/2024" "01/05/2024"
## [471] "11/15/2024" "2/23/2025"  "6/29/2024"  "09-Apr-24"  "12/07/2024"
## [476] "25-Jun-24"  "9/18/2024"  "12/08/2024" "10/27/2024" "09/10/2024"
## [481] "04-Apr-24"  "06/09/2024" "02-Apr-25"  "03/10/2024" "07-Jun-24" 
## [486] "19-Jul-24"  "06-Mar-24"  "06/03/2024" "02-May-24"  "6/13/2024" 
## [491] "28-Mar-25"  "11/25/2024" "07/12/2024" "04/12/2024" "09/09/2024"
## [496] "2/13/2025"  "8/26/2024"  "4/17/2025"  "20-Aug-24"  "2/14/2025" 
## [501] "10-Sep-24"  "12/22/2024" "18-Dec-24"  "3/28/2025"  "04/07/2024"
## [506] "12-Feb-24"  "30-Jan-24"  "04/01/2025" "7/20/2024"  "19-Dec-24" 
## [511] "24-Jan-25"  "10/29/2024" "03/11/2025" "11/28/2024" "4/19/2024" 
## [516] "03/05/2025" "09-Oct-24"  "04-Jan-25"  "06-Apr-24"  "2/28/2025" 
## [521] "15-Sep-24"  "01/01/2025" "17-Feb-24"  "10-Feb-25"  "26-Feb-24" 
## [526] "08/04/2024" "01/07/2024" "26-Feb-25"  "1/18/2025"  "9/27/2024" 
## [531] "23-Feb-25"  "2/25/2025"  "1/22/2025"  "05-Jan-24"  "11-Mar-24" 
## [536] "7/23/2024"  "09-Mar-25"  "12/21/2024" "01/03/2025" "22-Jan-24" 
## [541] "3/29/2025"  "1/25/2024"  "06/05/2024" "05-Dec-24"  "30-Jun-24" 
## [546] "3/27/2024"  "07-Jan-25"  "9/24/2024"  "08-Apr-25"  "02/12/2024"
## [551] "4/22/2025"  "1/16/2025"  "3/22/2024"  "06-Sep-24"  "19-Apr-24" 
## [556] "11/10/2024" "03-Jan-24"  "24-Mar-25"  "1/17/2025"  "11/03/2024"
## [561] "06-Nov-24"  "28-Apr-25"  "27-Apr-25"  "07/04/2024" "27-Feb-24" 
## [566] "3/23/2025"  "04/05/2024" "1/14/2025"  "7/17/2024"  "16-Jun-24" 
## [571] "08-Sep-24"  "01-Jul-24"  "07-Apr-25"  "18-Sep-24"  "05-Aug-24" 
## [576] "05/08/2024" "03/01/2025" "03-Jun-24"  "21-Feb-24"  "06-Feb-24" 
## [581] "25-Nov-24"  "24-Jun-24"  "08/10/2024" "29-Oct-24"  "12/20/2024"
## [586] "08-Dec-24"  "30-Jan-25"  "16-Mar-25"  "2/25/2024"  "9/14/2024" 
## [591] "02-Mar-25"  "4/13/2025"  "4/23/2024"  "11/08/2024" "03/06/2024"
## [596] "11-Mar-25"  "21-Sep-24"  "14-May-24"  "29-Apr-25"  "11/30/2024"
## [601] "21-Feb-25"  "07/02/2024" "1/19/2024"  "12/01/2024" "01-Mar-25" 
## [606] "07/11/2024" "12/11/2024" "1/23/2025"  "02/05/2025" "21-Jan-25" 
## [611] "19-Jan-24"  "25-May-24"  "29-Jun-24"  "10/06/2024" "24-Jan-24" 
## [616] "15-May-24"  "17-Dec-24"  "2/21/2025"  "17-Jun-24"  "20-Feb-25" 
## [621] "27-Sep-24"  "5/28/2024"  "15-Feb-24"  "09/03/2024" "11-Nov-24" 
## [626] "10-Nov-24"  "02/08/2024" "04-Jun-24"  "10/30/2024" "22-Apr-25" 
## [631] "10/20/2024" "4/16/2024"  "3/16/2024"  "28-Jun-24"  "11-Apr-25" 
## [636] "6/18/2024"  "16-Sep-24"  "24-Apr-25"  "09-Feb-25"  "5/29/2024" 
## [641] "04-Feb-24"  "4/14/2025"  "4/28/2024"  "04-Sep-24"  "28-Jan-24" 
## [646] "03/04/2024" "28-Nov-24"  "31-Jul-24"  "01-Aug-24"  "8/27/2024" 
## [651] "10/24/2024" "9/21/2024"  "18-Apr-24"  "04/11/2024" "26-Dec-24" 
## [656] "1/13/2025"  "24-Mar-24"  "04/04/2025" "22-Jul-24"  "22-Sep-24" 
## [661] "29-Sep-24"  "01/07/2025" "02/07/2024" "28-May-24"  "12/24/2024"
## [666] "04/09/2024" "11-Jan-25"  "02/03/2024" "11/23/2024" "12/31/2024"
## [671] "10/01/2024" "29-Aug-24"  "07-Mar-24"  "3/13/2024"  "4/26/2024" 
## [676] "8/30/2024"  "2/20/2024"  "7/16/2024"  "04/02/2024" "07-Nov-24" 
## [681] "09-Jan-25"  "28-Aug-24"  "12-Sep-24"  "4/16/2025"  "19-May-24" 
## [686] "06-Feb-25"  "10/13/2024" "6/30/2024"  "01-Dec-24"  "03/02/2024"
## [691] "11-Jun-24"  "27-Aug-24"  "01/06/2024" "21-Oct-24"  "03-Dec-24" 
## [696] "23-Apr-25"  "04/03/2024" "04/02/2025" "3/16/2025"  "1/21/2025" 
## [701] "16-Apr-24"  "1/24/2024"  "4/22/2024"  "10/14/2024" "2/17/2024" 
## [706] "09-Jun-24"  "11-Feb-24"  "03/07/2024" "12/03/2024" "17-Aug-24" 
## [711] "12/27/2024" "05/10/2024" "03-Sep-24"  "22-Jan-25"  "2/27/2025" 
## [716] "2/22/2025"  "17-Nov-24"  "01/04/2024" "9/29/2024"  "22-Oct-24" 
## [721] "7/21/2024"  "22-Feb-24"  "11/22/2024" "29-Nov-24"  "01/09/2025"
## [726] "5/19/2024"  "27-Dec-24"  "11-May-24"  "30-Jul-24"  "15-Nov-24" 
## [731] "08-Mar-25"  "3/17/2024"  "22-Feb-25"  "05-Jul-24"  "21-Apr-24" 
## [736] "04/03/2025" "8/29/2024"  "05/04/2024" "1/26/2024"  "18-Aug-24" 
## [741] "21-Aug-24"  "2/13/2024"  "23-Mar-25"  "23-Mar-24"  "13-Nov-24" 
## [746] "17-Mar-25"  "4/28/2025"  "7/22/2024"  "05/12/2024" "2/19/2024" 
## [751] "28-Feb-24"  "5/22/2024"  "24-Feb-25"  "4/17/2024"  "18-May-24" 
## [756] "26-Aug-24"  "27-May-24"  "04-Oct-24"  "9/26/2024"  "4/13/2024" 
## [761] "25-Mar-25"  "14-Jan-24"  "12/30/2024" "01-Apr-24"  "30-Dec-24" 
## [766] "4/26/2025"  "03-Jan-25"  "20-Jan-24"  "23-Oct-24"  "02/07/2025"
## [771] "4/30/2025"  "02/04/2025" "26-Oct-24"  "26-Jul-24"  "11/14/2024"
## [776] "7/28/2024"  "07/10/2024" "6/24/2024"  "5/30/2024"  "02-Dec-24" 
## [781] "15-Mar-24"  "05-Mar-25"  "13-Oct-24"  "01-Jan-24"  "21-Jun-24" 
## [786] "6/28/2024"  "11-Dec-24"  "24-Jul-24"  "26-Nov-24"  "18-Jun-24" 
## [791] "2/26/2024"  "08-Jan-25"  "08/03/2024" "08-Nov-24"  "3/19/2024" 
## [796] "16-Oct-24"  "9/22/2024"  "06/06/2024" "9/17/2024"  "3/21/2024" 
## [801] "31-Jan-25"  "05-Apr-25"  "01-Oct-24"  "2/21/2024"  "10/03/2024"
## [806] "2/27/2024"  "11/04/2024" "01/10/2024" "26-Apr-24"  "01-Jan-25" 
## [811] "02-Mar-24"  "1/15/2024"  "20-Jul-24"  "6/26/2024"  "30-Oct-24" 
## [816] "02/02/2025" "19-Jun-24"  "20-Jan-25"  "5/24/2024"  "01/02/2024"
## [821] "3/25/2025"  "12-Mar-25"  "10-Apr-25"  "7/24/2024"  "03-May-24" 
## [826] "05-Apr-24"  "12-Mar-24"  "09-Feb-24"  "23-Jul-24"  "02-Jan-25" 
## [831] "19-Nov-24"  "05-Nov-24"  "03-Feb-24"  "26-Jan-24"  "3/15/2024" 
## [836] "27-Mar-25"  "24-Oct-24"  "02/10/2025" "08-May-24"  "12-Jan-24" 
## [841] "28-Oct-24"  "2/16/2025"  "3/30/2025"  "11-Sep-24"  "26-Sep-24" 
## [846] "03-Nov-24"  "1/16/2024"  "31-Mar-24"  "3/29/2024"  "03/03/2024"
## [851] "12-Feb-25"  "18-Mar-24"  "22-Aug-24"  "23-Jan-25"  "09-Dec-24" 
## [856] "15-Aug-24"  "02/04/2024" "11/05/2024" "23-Jun-24"  "17-Apr-24" 
## [861] "01/01/2024" "09/11/2024" "10-Dec-24"  "27-Mar-24"  "12/19/2024"
## [866] "18-Apr-25"  "28-Dec-24"  "03-Apr-24"  "09/06/2024" "25-Apr-24" 
## [871] "13-Sep-24"  "28-Mar-24"  "01/11/2025" "14-Mar-24"  "02-Jul-24" 
## [876] "09-Jul-24"  "16-Nov-24"  "1/20/2025"  "05-Mar-24"  "25-Sep-24" 
## [881] "07/09/2024" "25-Mar-24"  "01-Jun-24"  "11/01/2024" "1/24/2025" 
## [886] "12-Nov-24"  "10-May-24"  "03-Oct-24"  "31-Mar-25"  "1/23/2024" 
## [891] "9/30/2024"  "13-Jun-24"  "17-Mar-24"  "10-Feb-24"  "08-Jun-24" 
## [896] "22-Mar-25"  "25-Jul-24"  "10-Mar-25"  "27-Jan-25"  "11/12/2024"
## [901] "16-Aug-24"  "10/28/2024" "10/02/2024" "9/25/2024"  "2/15/2024" 
## [906] "01-Feb-24"  "02-Apr-24"  "12/23/2024" "25-Feb-25"  "12-Jul-24" 
## [911] "29-Jan-25"  "13-May-24"  "02-Feb-24"  "7/27/2024"  "2/20/2025" 
## [916] "15-Jul-24"  "05-Feb-25"  "2/18/2024"  "17-Jan-25"  "19-Oct-24" 
## [921] "05-Sep-24"  "4/29/2024"  "16-Jan-24"  "14-Jan-25"  "11-Jan-24" 
## [926] "24-Sep-24"  "29-Jan-24"  "4/19/2025"  "07-Oct-24"  "14-Feb-25" 
## [931] "11-Feb-25"  "25-Jan-25"  "14-Mar-25"  "10-Jan-25"  "26-Jan-25" 
## [936] "13-Apr-25"  "30-Apr-24"  "5/27/2024"  "29-Jul-24"  "8/18/2024" 
## [941] "09-May-24"  "02-Sep-24"  "3/25/2024"  "29-May-24"  "5/31/2024" 
## [946] "01/05/2025" "23-Feb-24"  "8/25/2024"  "03/12/2025" "09-Jan-24" 
## [951] "12/18/2024" "11-Oct-24"  "12/06/2024" "3/24/2025"  "7/13/2024" 
## [956] "13-Apr-24"  "11-Apr-24"  "02-Jan-24"  "06-Dec-24"  "07-Jan-24" 
## [961] "16-Feb-24"  "30-Nov-24"  "23-Jan-24"  "18-Jul-24"  "20-Apr-25" 
## [966] "04/06/2025" "16-Feb-25"  "04-Aug-24"  "21-May-24"  "16-Mar-24" 
## [971] "21-Nov-24"  "18-Oct-24"
##  Date[1:15000], format: "2024-10-18" "2024-11-20" "2025-03-18" "2024-12-23" "2025-04-15" ...
##  Date[1:15000], format: "2024-11-07" "2025-01-11" "2025-04-07" "2025-02-24" "2025-06-23" ...
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00    6.30    7.60   12.12    9.00   99.90

## # A tibble: 6 × 3
##   job_id  job_title             benefits_score
##   <chr>   <chr>                          <dbl>
## 1 AI00001 AI Research Scientist           99.9
## 2 AI00021 Data Engineer                   99.9
## 3 AI00041 Data Scientist                  99.9
## 4 AI00061 Data Scientist                  99.9
## 5 AI00081 Data Analyst                    99.9
## 6 AI00101 AI Product Manager              99.9
## [1] 750
## [1] 14250
## [1] 0
## [1] 750
## [1] 15000
##   Rentang_Nilai Jumlah_Data Persentase
## 1        0 – 10       14250         95
## 2     10 – 99.9           0          0
## 3          99.9         750          5
## [1] 130
## [1] 14250
##  [1] "TechCorp Inc"               "Autonomous Tech"           
##  [3] "Future Systems"             "Advanced Robotics"         
##  [5] "Neural Networks Co"         "DataVision Ltd"            
##  [7] "Cloud AI Solutions"         "Quantum Computing Inc"     
##  [9] "Smart Analytics"            "Predictive Systems"        
## [11] "AI Innovations"             "Algorithmic Solutions"     
## [13] "Cognitive Computing"        "DeepTech Ventures"         
## [15] "Machine Intelligence Group" "Digital Transformation LLC"
## 
##               TechCorp Inc        Cognitive Computing 
##                        929                        926 
##             AI Innovations Digital Transformation LLC 
##                        915                        915 
##             Future Systems      Quantum Computing Inc 
##                        913                        907 
##         Cloud AI Solutions         Predictive Systems 
##                        903                        901 
##          Advanced Robotics            Smart Analytics 
##                        887                        881 
##            Autonomous Tech Machine Intelligence Group 
##                        873                        871 
##         Neural Networks Co          DeepTech Ventures 
##                        868                        856 
##             DataVision Ltd      Algorithmic Solutions 
##                        855                        850
  1. EDA dan Visualisasi
# --- 1. Top 10 Job Titles ---
# Ambil 10 job_title paling umum
top10_jobs <- data %>%
  count(job_title, sort = TRUE) %>%
  slice_max(n, n = 10)

# Buat gradasi warna dari ungu tua ke pink muda
gradasi_ungu <- colorRampPalette(c("#4B2E59", "#E1BEE7"))(10)

# Plot horizontal bar chart
p_top10 <- ggplot(top10_jobs, aes(x = n, y = fct_reorder(job_title, n))) +
  geom_col(fill = gradasi_ungu) +
  labs(title = "Top 10 Job Titles", x = "Jumlah Lowongan Kerja", y = NULL) +
  theme_minimal(base_family = "sans") +
  theme(
    plot.title = element_text(face = "bold", size = 16, color = "#D6A8D9"),
    plot.background = element_rect(fill = NA, color = NA),
    panel.background = element_rect(fill = NA)
  )

# Simpan hasil ke file PNG transparan
ggsave("output_plot/top10_job_title_gradient.png", plot = p_top10, width = 7, height = 5, bg = "transparent")

# Tampilkan plot di Viewer
print(p_top10)

Insight dari Bar Chart Tersebut

  1. Job yang paling dicari adalah Machine Learning Researcher (768 lowongan).
  2. Posisi teknikal seperti AI Engineer dan NLP Engineer sangat dibutuhkan.
  3. Head of AI dan Data Analyst juga penting meski bukan posisi teknikal murni.
  4. Perbedaan lowongan antar posisi tidak jauh, semua punya peluang besar.
# --- 2. Top 5 Company Locations (Pie Chart) ---
# --- Ambil Top 5 Company Locations ---
top_lokasi <- data %>%
  count(company_location, sort = TRUE) %>%
  slice_max(n, n = 5) %>%
  mutate(prop = n / sum(n)) %>%
  arrange(desc(prop)) %>%  # urutkan dari yang proporsi besar ke kecil
  mutate(
    label = company_location,
    ypos = cumsum(prop) - 0.5 * prop
  )

# ---Buat warna gradasi dari gelap ke terang ---
fill_colors <- colorRampPalette(c("#2C005B", "#E9D5FF"))(nrow(top_lokasi))
names(fill_colors) <- top_lokasi$label


# --- Ambil Top 5 Company Locations ---
top_lokasi <- data %>%
  count(company_location, sort = TRUE) %>%
  slice_max(n, n = 5) %>%
  mutate(prop = n / sum(n)) %>%
  arrange(desc(prop)) %>%
  mutate(
    ypos = cumsum(prop) - 0.5 * prop
  )

#  Buat warna gradasi dari gelap ke terang ---
fill_colors <- colorRampPalette(c("#2C005B", "#E9D5FF"))(nrow(top_lokasi))
names(fill_colors) <- top_lokasi$company_location  # pakai nama asli tanpa persentase

# --- Buat Pie Chart TANPA persen di legend ---
p <- ggplot(top_lokasi, aes(x = "", y = prop, fill = company_location)) +
  geom_col(width = 1, color = "black", alpha = 0.85) +
  coord_polar(theta = "y") +
  geom_text(aes(y = ypos, label = paste0(round(prop * 100, 1), "%")),
            color = "black", size = 4) +
  scale_fill_manual(values = fill_colors) +
  labs(title = "Top 5 Company Locations for AI Jobs") +
  theme_void() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14, color = "black"),
    legend.position = "right",
    legend.text = element_text(size = 10, color = "grey2"),
    legend.title = element_blank(),
    legend.background = element_rect(fill = "transparent", color = NA),
    legend.key = element_rect(fill = "transparent", color = NA),
    plot.background = element_rect(fill = "transparent", color = NA),
    panel.background = element_rect(fill = "transparent", color = NA)
  )

# ---Simpan plot ---
ggsave("output_plot/pie_company_location_final_nopct_legend.png", plot = p,
       width = 8, height = 6, dpi = 300, bg = "transparent")
# Tampilkan plot di Viewer
print(p)

Insight dari pie chart tersebut :

Distribusi lokasi perusahaan AI di lima negara sangat merata, dengan Austria memimpin tipis (20,7%) yang kemudian disusul empat negara lain. Ini menunjukkan bahwa peluang karier AI tersebar luas di berbagai negara, tidak hanya terpusat di satu lokasi dominan.

# --- 3. experience_level ---
# Mapping experience level agar lebih deskriptif
experience_mapping <- c(
  "EN" = "EN (Entry)",
  "MI" = "MI (Mid)",
  "SE" = "SE (Senior)",
  "EX" = "EX (Expert)"
)

# Hitung jumlah lowongan per experience level
exp_plot_data <- data %>%
  count(experience_level) %>%
  mutate(experience_label = experience_mapping[experience_level])

# Buat plot
p_experience <- ggplot(exp_plot_data, aes(x = experience_label, y = n, fill = experience_label)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = n), vjust = -0.5, color = "black", size = 5) +
  scale_fill_manual(values = c(
    "EN (Entry)" = "#E1BEE7",
    "MI (Mid)" = "#AB47BC",
    "SE (Senior)" = "#CE93D8",
    "EX (Expert)" = "#6A1B9A"
  )) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +  # Tambah ruang atas 10%
  labs(
    title = "Distribusi Tingkat Pengalaman",
    x = "Experience Level",
    y = "Jumlah Lowongan Pekerjaan",
    caption = "Keterangan: EN = Entry | MI = Mid | SE = Senior | EX = Expert"
  ) +
  theme_minimal(base_family = "sans") +
  theme(
    plot.title = element_text(color = "black", face = "bold", size = 16, hjust = 0.5),
    axis.text = element_text(color = "grey20", size = 12),
    axis.title = element_text(color = "grey20", size = 13),
    plot.caption = element_text(color = "grey40", size = 10, hjust = 0),
    legend.position = "none",
    panel.grid = element_blank(),
    plot.background = element_rect(fill = "transparent", color = NA),
    panel.background = element_rect(fill = "transparent", color = NA),
    plot.title.position = "plot",
    plot.margin = margin(t = 60, r = 30, b = 30, l = 30)
  )

ggsave("plot_experience.png",
       plot = p_experience,
       width = 10, height = 7, dpi = 300, bg = "transparent")
# Tampilakn plot
print(p_experience)

Insight dari Bar chart tersebut :

Lowongan AI tersedia secara merata untuk semua level pengalaman baik dari Entry hingga Expert. Artinya, baik pemula maupun profesional berpengalaman punya peluang besar untuk masuk ke industri.

# --- 4. education_required ---
edu_dist <- data %>%
  count(education_required) %>%
  mutate(education_required = reorder(education_required, -n)) %>%
  arrange(desc(n)) %>%
  mutate(fill_color = colorRampPalette(c("#4B0082", "#D8BFD8"))(n()))

gg_edu <- ggplot(edu_dist, aes(x = education_required, y = n, fill = fill_color)) +
  geom_col() +
  geom_text(aes(label = n), 
            vjust = -0.3, size = 4, color = "grey") +
  scale_fill_identity() +
  labs(title = "Distribusi Pendidikan yang Dibutuhkan",
       x = "Tingkat Pendidikan",
       y = "Jumlah Lowongan Pekerjaan") +
  theme_minimal(base_family = "sans") +
  theme(
    panel.grid = element_blank(),  # Hilangkan semua grid
    plot.background = element_rect(fill = "transparent", color = NA),
    panel.background = element_rect(fill = "transparent", color = NA),
    axis.text = element_text(color = "grey2"),
    axis.title = element_text(color = "grey2"),
    plot.title = element_text(face = "bold", size = 14, color = "black"),
    legend.position = "none"
  )

# Simpan grafik
ggsave("output_plot/grafik_education_required_cleaned.png", gg_edu, bg = "transparent", width = 8, height = 6)

# Tampilkan plot di Viewer
print(gg_edu)

Insight dari Bar chart tersebut :

Lowongan pekerjaan di bidang AI tidak mensyaratkan gelar tinggi secara mutlak. Bahkan lulusan Associate dan Bachelor memiliki jumlah lowongan yang setara atau lebih banyak dibanding Master dan PhD. Artinya, kesempatan masuk ke dunia AI terbuka luas bagi semua jenjang pendidikan.

# --- 5. Distribusi Company Size ---
# 1. Ubah menjadi faktor di urutan S, M, L
data_company <- data %>%
  filter(!is.na(company_size)) %>%
  mutate(company_size = factor(company_size, levels = c("S", "M", "L")))


# 2. Hitung jumlah masing-masing kategori
company_size_plot <- data_company %>%
  count(company_size)

# 3. Plot
p_company_size <- ggplot(company_size_plot, aes(x = company_size, y = n, fill = company_size)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = n), vjust = -0.5, color = "grey2", size = 5) +
  scale_fill_manual(values = c("S" = "#CE93D8", "M" = "#9C27B0", "L" = "#E1BEE7")) +
  labs(
    title = "Distribusi Ukuran Perusahaan",
    x = "Ukuran (S, M, L)",
    y = "Jumlah"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    panel.grid = element_blank(),
    plot.title = element_text(color = "black", face = "bold", size = 16),
    axis.text = element_text(color = "grey2", size = 12),
    axis.title = element_text(color = "black"),
    plot.background = element_rect(fill = "transparent", color = NA),
    panel.background = element_rect(fill = "transparent", color = NA),
    legend.position = "none",
    plot.margin = margin(t = 40, r = 10, b = 10, l = 10)
  )+
  coord_cartesian(clip = "off")


# 4. Simpan
ggsave(
  filename = "output_plot/distribusi_company_size.png",
  plot = p_company_size,
  width = 8, height = 6,
  dpi = 300,
  bg = "transparent"
)

# 5. Tampilkan
print(p_company_size)

Insight dari Bar chart tersebut :

Lowongan pekerjaan AI tersebar merata di semua ukuran perusahaan, baik kecil (S), menengah (M), maupun besar (L). Ini menunjukkan bahwa peluang karier di bidang AI tidak hanya terbatas pada perusahaan besar, tetapi juga terbuka luas di perusahaan kecil dan menengah.

# --- 6. Barchart Top 10 Pekerjaan dengan Gaji Tertinggi ---
# 1. Hitung rata-rata gaji per job_title
top10_jobs <- data %>%
  group_by(job_title) %>%
  summarise(mean_salary = mean(salary_usd, na.rm = TRUE)) %>%
  arrange(desc(mean_salary)) %>%
  slice_head(n = 10)

# 2. Tambahkan industri dominan
top10_with_industry <- data %>%
  filter(job_title %in% top10_jobs$job_title) %>%
  group_by(job_title, industry) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(job_title) %>%
  slice_max(n, n = 1) %>%
  select(job_title, industry) %>%
  left_join(top10_jobs, by = "job_title")

# 3. Gradasi warna ungu
purple_gradient <- colorRampPalette(c("#4B0082", "#D8BFD8"))(length(unique(top10_with_industry$industry)))

# 4. Plot bar horizontal dengan perbaikan label & ruang kanan
p_top10_clean <- ggplot(top10_with_industry, aes(x = reorder(job_title, mean_salary),
                                                 y = mean_salary, fill = industry)) +
  geom_col(width = 0.7) +
  coord_flip(clip = "off") +  # biar label bisa keluar
  geom_text(aes(label = paste0("$", round(mean_salary))), 
            hjust = -0.1, color = "grey", size = 4) +
  scale_fill_manual(values = purple_gradient) +
  labs(
    title = "Top 10 Pekerjaan dengan Gaji Tertinggi",
    subtitle = "Warna berdasarkan industri dominan",
    x = "Job Title", y = "Rata-rata Gaji (USD)", fill = "Industry"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +  # ruang kanan 20%
  theme_minimal(base_family = "sans") +
  theme(
    plot.background = element_rect(fill = "transparent", color = NA),
    panel.background = element_rect(fill = "transparent", color = NA),
    panel.grid = element_blank(),
    axis.text = element_text(color = "grey2"),
    axis.title = element_text(color = "grey"),
    plot.title = element_text(color = "black", face = "bold", size = 16),
    plot.subtitle = element_text(color = "purple", size = 12),
    legend.title = element_text(color = "grey2"),
    legend.text = element_text(color = "grey"),
    legend.position = "right"
  )

# 5. Simpan sebagai PNG transparan
ggsave("top10_job_salary_colored_by_industry.png",
       plot = p_top10_clean,
       width = 14, height = 7, dpi = 300, bg = "transparent")

# Tampilkan plot
print(p_top10_clean)

Insight dari Bar chart tersebut :

Profesi berbasis AI mendominasi posisi dengan gaji tertinggi, seperti AI Specialist, Machine Learning Engineer, dan Head of AI. Ini mencerminkan permintaan tinggi terhadap keahlian AI di berbagai industri. Meskipun lebih teknis, peran seperti AI Consultant dan Principal Data Scientist juga tetap masuk top 10, menandakan kombinasi teknis dan strategis sangat dihargai di pasar kerja saat ini.

# --- 7. Heatmap Job Title X Industry ---
#Heatmap Ringkas: Job Title × Industri (Tapi Top 5-7 Saja)
#Masalah: Ada 15 industri → heatmap terlalu padat.

#Solusi:
# Ambil 5 industri dengan median gaji tertinggi
library(dplyr)
library(ggplot2)

# Ambil 5 industri dengan median gaji tertinggi
top_industri <- data %>%
  group_by(industry) %>%
  summarise(median_salary = median(salary_usd, na.rm = TRUE)) %>%
  arrange(desc(median_salary)) %>%
  slice(1:5) %>%
  pull(industry)

# Ambil Top 10 job_title dengan rata-rata gaji tertinggi di industri tersebut
top_jobs <- data %>%
  filter(industry %in% top_industri) %>%
  group_by(job_title) %>%
  summarise(avg_salary = mean(salary_usd, na.rm = TRUE)) %>%
  arrange(desc(avg_salary)) %>%
  slice(1:10) %>%
  pull(job_title)

# Buat heatmap dan simpan ke variabel
p_heatmap <- data %>%
  filter(industry %in% top_industri, job_title %in% top_jobs) %>%
  group_by(industry, job_title) %>%
  summarise(avg_salary = mean(salary_usd, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = industry, y = reorder(job_title, avg_salary), fill = avg_salary)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(avg_salary, 0)), size = 3, color = "white") +
  scale_fill_gradientn(
    colours = c("#f2e5ff", "#b983ff", "#6a00ff"),
    name = "Gaji Rata-rata (USD)"
  ) +
 labs(
    title = "Top 10 Gaji AI: Job Title vs Industri",
    x = "Industri", y = "Posisi Pekerjaan"
  ) +
  theme_minimal() +
  theme(
    panel.background = element_rect(fill = "transparent", color = NA),
    plot.background = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA),
    legend.key = element_rect(fill = "transparent", color = NA),
    axis.text = element_text(color = "grey2"),
    axis.title = element_text(color = "purple"),
    plot.title = element_text(color = "purple3", face = "bold", size = 14, hjust = 0.5),
    legend.title = element_text(color = "grey2"),
    legend.text = element_text(color = "grey"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

# Simpan dengan background transparan
ggsave("heatmap_top10_salary_by_industry1.png",
       plot = p_heatmap,
       width = 10, height = 6, dpi = 300, bg = "transparent")

#Tampilkan Plot 
print(p_heatmap)

Insight dari Heatmap tersebut :

Gaji tertinggi dicapai oleh ML Ops Engineer di industri Education (USD138K), Industri seperti Software, Finance, dan Media punya range kompensasi paling kompetitif. Profesi yang sama bisa beda gaji tergantung industrinya.

# --- 8. Stacked Column Chart Jenis Gaya Kerja ---
data <- data %>%
  mutate(remote_category = case_when(
    remote_ratio == 0 ~ "Onsite",
    remote_ratio == 50 ~ "Hybrid",
    remote_ratio == 100 ~ "Remote",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(remote_category))
# Hitung total jumlah pekerjaan per kombinasi job_title dan remote_category
remote_dist_filtered <- data %>%
  group_by(job_title, remote_category) %>%
  summarise(Jumlah = n(), .groups = "drop")
#Ambil Top 10 job_title dengan jumlah total pekerjaan terbanyak
top_10_jobtitle <- remote_dist_filtered %>%
  group_by(job_title) %>%
  summarise(total = sum(Jumlah)) %>%
  arrange(desc(total)) %>%
  slice_head(n = 10) %>%
  pull(job_title)
#Filter hanya job_title top 10
remote_dist_filtered <- remote_dist_filtered %>%
  filter(job_title %in% top_10_jobtitle)
p_gayakerja <- ggplot(remote_dist_filtered, aes(x = reorder(job_title, -Jumlah), y = Jumlah, fill = remote_category)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = Jumlah),
            position = position_stack(vjust = 0.5),
            size = 3,
            color = "white") +
  scale_fill_manual(
    values = c(
      "Remote" = "#8E44AD",   # ungu tua
      "Hybrid" = "#BB8FCE",   # ungu muda
      "Onsite" = "#D2B4DE"    # ungu sangat muda
    )
  ) +
  labs(
    title = "Jenis Gaya Kerja Berdasarkan Pekerjaan",
    subtitle = "Top 10 Pekerjaan dengan Distribusi Remote, Hybrid, dan Onsite",
    x = "Job Title",
    y = "Jumlah Pekerjaan",
    fill = "Gaya Kerja"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    panel.background = element_rect(fill = "transparent", color = NA),
    plot.background = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent"),
    legend.box.background = element_rect(fill = "transparent"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", size = 16, color = "#6C3483"),
    plot.subtitle = element_text(size = 12, color = "#A569BD")
  )

# Simpan ke file PNG transparan
ggsave("visual-remote-distribusi.png",
       width = 12, height = 6.5, dpi = 300, bg = "transparent")

# Tampilkan Plot
print(p_gayakerja)

Insight dari Stacked bar chart tersebut :

Sebagian besar pekerjaan di bidang AI memungkinkan kerja remote, terutama untuk posisi seperti Data Engineer, Machine Learning Engineer, dan Software Engineer, yang punya jumlah lowongan remote tertinggi.Ini menunjukkan bahwa fleksibilitas kerja tinggi di bidang teknologi dan AI.

# --- 9. Boxplot Gaji berdasarkan Gaya Kerja ---
# Buat kategori remote
data <- data %>%
  mutate(remote_category = case_when(
    remote_ratio == 0 ~ "Onsite",
    remote_ratio == 50 ~ "Hybrid",
    remote_ratio == 100 ~ "Remote",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(remote_category))

# Urutkan kategori
data$remote_category <- factor(data$remote_category, levels = c("Onsite", "Hybrid", "Remote"))

# Plot
boxplot_gaji_gayakerja <- ggplot(data, aes(x = remote_category, y = salary_usd, fill = remote_category)) +
  geom_boxplot(outlier.alpha = 0.15, width = 0.6, color = "black", outlier.size = 1) +
  scale_fill_manual(
    values = c(
      "Remote" = "#8E44AD",
      "Hybrid" = "#A569BD",
      "Onsite" = "#D2B4DE"
    )
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  labs(
    title = "Perbandingan Gaji Berdasarkan Gaya Kerja",
    subtitle = "Remote ternyata tetap punya rentang gaji kompetitif",
    x = "Gaya Kerja",
    y = "Gaji (USD)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 16, color = "#6C3483", hjust = 0.5),
    plot.subtitle = element_text(size = 12, color = "#A569BD", hjust = 0.5),
    axis.title.x = element_text(size = 13),
    axis.title.y = element_text(size = 13),
    axis.text = element_text(size = 11),
    legend.position = "none",
    plot.margin = margin(t = 20, r = 10, b = 10, l = 10)
  )

# Simpan plot
ggsave("boxplot-salary-remote-fixed-axisfix.png", plot = boxplot_gaji_gayakerja,
       width = 8.5, height = 6, dpi = 300, bg = "white")

# Tampilkan
print(boxplot_gaji_gayakerja)

Insight dari Box plot tersebut :

Gaya kerja tidak menentukan gaji secara signifikan di bidang AI. Bahkan, pekerjaan remote tetap menawarkan gaji kompetitif dengan potensi median yang sedikit lebih tinggi.

# --- 10. Wordcloud Skill Paling Sering Muncul secara General ---
# 1. wordcloud general
# Pisah skill berdasarkan koma
skill_words <- data %>%
  select(required_skills) %>%
  separate_rows(required_skills, sep = ",") %>%
  mutate(required_skills = str_trim(required_skills)) %>%
  count(required_skills, sort = TRUE)

# Wordcloud skill general all perusahaan
set.seed(123)

wordcloud(words = skill_words$required_skills,
          freq = skill_words$n,
          min.freq = 2,
          scale = c(4, 0.5),
          colors = brewer.pal(8, "BuPu"))

png("wordcloud_umum.png", width = 800, height = 600, bg = "white")
wordcloud(words = skill_words$required_skills,
          freq = skill_words$n,
          min.freq = 2,
          scale = c(4, 0.5),
          colors = brewer.pal(8, "BuPu"))
dev.off()
## png 
##   2

Insight dari Wordcloud tersebut :

Python dan SQL adalah fondasi utama dalam dunia AI, keduanya paling sering diminta dan wajib dipelajari oleh siapa pun yang ingin masuk bidang ini.Di sisi lain, skill lanjutan seperti Kubernetes, TensorFlow, dan PyTorch juga makin dicari, menandakan bahwa penguasaan tools teknis dan framework machine learning/deployment makin krusial.

# --- 11. BarChart 10 Skill Paling Banyak Dicari ---
# Bersihkan kolom skill dan pecah jadi baris
skill_df <- data %>%
  filter(!is.na(required_skills)) %>%
  separate_rows(required_skills, sep = ",\\s*") %>%   # pisah berdasarkan koma + spasi opsional
  mutate(required_skills = str_trim(required_skills)) # hapus spasi tambahan

# Hitung frekuensi skill
top_skill <- skill_df %>%
  group_by(required_skills) %>%
  summarise(freq = n()) %>%
  arrange(desc(freq)) %>%
  slice_max(order_by = freq, n = 10)   # ambil 10 teratas

# Urutkan faktor untuk ggplot (supaya tidak alfabetik)
top_skill <- top_skill %>%
  mutate(required_skills = fct_reorder(required_skills, freq))

# Plot bar chart
ggplot(top_skill, aes(x = required_skills, y = freq)) +
  geom_bar(stat = "identity", fill = "#A569BD") +
  labs(
    title = "📈 10 Skill Paling Banyak Diminta",
    subtitle = "Python dan SQL tetap raja. LLM naik daun. Skill visualisasi & tools tetap relevan.",
    x = "Skill",
    y = "Jumlah Kemunculan"
  ) +
  theme_minimal(base_size = 13) +
  coord_flip()

Insight dari Bar chart tersebut :

Python dan SQL adalah fondasi utama dalam dunia AI, keduanya paling sering diminta dan wajib dipelajari oleh siapa pun yang ingin masuk bidang ini.Di sisi lain, skill lanjutan seperti Kubernetes, TensorFlow, dan PyTorch juga makin dicari, menandakan bahwa penguasaan tools teknis dan framework machine learning/deployment makin krusial.

# --- 12. Wordcloud Skill by Top 5 Pekerjaan Gaji Tertinggi---
# Wordcloud skill by perusahaan top 5 rata2 gaji tertinggi
# 1. Hitung rata-rata gaji per perusahaan
mean_salary_company <- data %>%
  group_by(company_size) %>%
  summarise(mean_salary = mean(salary_usd, na.rm = TRUE)) %>%
  arrange(desc(mean_salary))

# 2. Ambil 5 perusahaan dengan gaji rata-rata tertinggi
top5_companies <- mean_salary_company %>%
  slice(1:5) %>%
  pull(company_size)

# 3. Filter data berdasarkan 5 perusahaan tersebut
top5_data <- data %>%
  filter(company_size %in% top5_companies)

# 4. Ekstraksi skill dan hitung frekuensi
top5_skill_words <- top5_data %>%
  select(required_skills) %>%
  separate_rows(required_skills, sep = ",") %>%
  mutate(required_skills = str_trim(required_skills)) %>%
  filter(required_skills != "", !is.na(required_skills)) %>%
  count(required_skills, sort = TRUE)

# 5. Buat dan simpan wordcloud
set.seed(123)
wordcloud(words = top5_skill_words$required_skills,
          freq = top5_skill_words$n,
          min.freq = 1,
          scale = c(4, 0.5),
          colors = brewer.pal(8, "PuOr"))

png("wordcloud_top5_companies.png", width = 800, height = 600, bg = "white")
wordcloud(words = top5_skill_words$required_skills,
          freq = top5_skill_words$n,
          min.freq = 1,
          scale = c(4, 0.5),
          colors = brewer.pal(8, "PuOr"))
dev.off()
## png 
##   2

Insight dari wordcloud tersebut :

Skill teknikal seperti Python, SQL, Kubernetes, TensorFlow, dan PyTorch mendominasi pekerjaan bergaji tertinggi di dunia AI.

# --- 13.Stacked Column Chart Pekerjaan by Education ---
data_filtered <- data %>%
  filter(education_required %in% c("Associate", "Bachelor", "Master", "PhD")) %>%
  group_by(job_title, education_required) %>%
  summarise(jumlah = n(), .groups = "drop")

top_jobs <- data_filtered %>%
  group_by(job_title) %>%
  summarise(total = sum(jumlah)) %>%
  arrange(desc(total)) %>%
  slice_max(order_by = total, n = 5) %>%
  pull(job_title)

data_top <- data_filtered %>%
  filter(job_title %in% top_jobs)

barchart_edu <- ggplot(data_top, aes(x = fct_reorder(job_title, -jumlah, .fun = sum), y = jumlah, fill = education_required)) +
  geom_bar(stat = "identity", width = 0.7) +
  scale_fill_manual(values = c(
    "Associate" = "#f4ecf7",
    "Bachelor"  = "#d2b4de",
    "Master"    = "#a569bd",
    "PhD"       = "#6c3483"
  )) +
  labs(
    title = "Pekerjaan Berdasarkan Tingkat Pendidikan (Top 5)",
    subtitle = "Perbandingan lulusan Associate, S1, S2, dan S3",
    x = NULL,
    y = "Jumlah Pekerja",
    fill = "Tingkat Pendidikan"
  ) +
  theme_minimal(base_size = 15) +
  theme(
    plot.title = element_text(face = "bold", size = 20, color = "purple4", hjust = 0.5),
    plot.subtitle = element_text(size = 14, margin = margin(b = 15), color = "grey30", hjust = 0.5),
    axis.text.x = element_text(size = 13, angle = 15, hjust = 1, color = "grey30"),
    axis.text.y = element_text(size = 13, color = "grey30"),
    axis.title = element_text(color = "grey30"),
    legend.title = element_text(color = "#d2b4de"),
    legend.text = element_text(color = "grey2"),
    legend.position = "top",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_rect(fill = "transparent", color = NA),
    plot.background = element_rect(fill = "transparent", color = NA),
    plot.margin = margin(t = 40, r = 30, b = 40, l = 30)
  ) +
  coord_cartesian(clip = "off")

# Simpan sebagai PNG
ggsave(
  filename = "stacked_barchart_jobtitle_top5_clean.png",
  plot = barchart_edu,
  width = 10,
  height = 7,
  dpi = 300,
  bg = "transparent"
)
# Tampilkan
print(barchart_edu)

Insight dari Stacked bar chart tersebut :

Lulusan S1 punya banyak peluang di dunia AI, terutama untuk posisi seperti AI Software Engineer dan Machine Learning Engineer. Sementara itu, lulusan S2 dan S3 (Master & PhD) lebih banyak ditemukan pada role yang fokus pada riset dan arsitektur sistem seperti AI Researcher atau AI Architect.