Tujuan eksplorasi adalah untuk: -Membersihkan dan menstandarkan data. -Menganalisis distribusi dan pola dari berbagai variabel. -Menginterpretasikan temuan yang dapat digunakan untuk pengambilan keputusan atau analisis lanjutan.
#Struktur Data
data <- read.csv("D:/AI_Job_Dataset.csv", sep=";", stringsAsFactors = FALSE)
str(data)
## 'data.frame': 15000 obs. of 19 variables:
## $ job_id : chr "AI00001" "AI00002" "AI00003" "AI00004" ...
## $ job_title : chr "AI Research Scientist" "AI Software Engineer" "AI Specialist" "NLP Engineer" ...
## $ salary_usd : chr NA "61895" "152626" "80215" ...
## $ salary_currency : chr "USD" "USD" "USD" "USD" ...
## $ experience_level : chr "SE" "EN" "MI" "SE" ...
## $ employment_type : chr "CT" "CT" "FL" "FL" ...
## $ company_location : chr "China" "Canada" "Switzerland" "India" ...
## $ company_size : chr "m" "M" "L" "M" ...
## $ employee_residence : chr "China" "Ireland" "South Korea" "India" ...
## $ remote_ratio : chr "50" "100" "0" "50" ...
## $ required_skills : chr "Tableau, PyTorch, Kubernetes, Linux, NLP" "Deep Learning, AWS, Mathematics, Python, Docker" "Kubernetes, Deep Learning, Java, Hadoop, NLP" "Scala, SQL, Linux, Python" ...
## $ education_required : chr "bachelor" "Master" "associate" "PhD" ...
## $ years_experience : int 9 1 2 7 0 7 3 0 7 5 ...
## $ industry : chr "Automotive" "Media" "Education" "Consulting" ...
## $ posting_date : chr "10/18/2024" "11/20/2024" "3/18/2025" "23-Dec-24" ...
## $ application_deadline : chr "11/07/2024" "01/11/2025" "04/07/2025" "2/24/2025" ...
## $ job_description_length: int 1076 1268 1974 1345 1989 819 1936 1286 551 2340 ...
## $ benefits_score : num 99.9 5.2 9.4 8.6 6.6 5.9 6.3 7.6 9.3 5.8 ...
## $ company_name : chr "Smart Analytics" "TechCorp Inc" "Autonomous Tech" "Future Systems" ...
colSums(is.na(data))
## job_id job_title salary_usd
## 0 0 2143
## salary_currency experience_level employment_type
## 0 0 0
## company_location company_size employee_residence
## 0 0 0
## remote_ratio required_skills education_required
## 0 0 0
## years_experience industry posting_date
## 0 0 0
## application_deadline job_description_length benefits_score
## 0 0 0
## company_name
## 0
#Pembersihan kolom
data$X <- NULL # Kolom ID tidak digunakan
colSums(is.na(data))
## job_id job_title salary_usd
## 0 0 2143
## salary_currency experience_level employment_type
## 0 0 0
## company_location company_size employee_residence
## 0 0 0
## remote_ratio required_skills education_required
## 0 0 0
## years_experience industry posting_date
## 0 0 0
## application_deadline job_description_length benefits_score
## 0 0 0
## company_name
## 0
## [1] "AI Research Scientist" "AI Software Engineer"
## [3] "AI Specialist" "NLP Engineer"
## [5] "AI Consultant" "AI Architect"
## [7] "Principal Data Scientist" "Data Analyst"
## [9] "Autonomous Systems Engineer" "AI Product Manager"
## [11] "Machine Learning Engineer" "Data Engineer"
## [13] "Research Scientist" "ML Ops Engineer"
## [15] "Robotics Engineer" "Head of AI"
## [17] "Deep Learning Engineer" "Data Scientist"
## [19] "Machine Learning Researcher" "Computer Vision Engineer"
## [21] "computer vision engineer"
## [1] "AI Research Scientist" "AI Software Engineer"
## [3] "AI Specialist" "NLP Engineer"
## [5] "AI Consultant" "AI Architect"
## [7] "Principal Data Scientist" "Data Analyst"
## [9] "Autonomous Systems Engineer" "AI Product Manager"
## [11] "Machine Learning Engineer" "Data Engineer"
## [13] "Research Scientist" "ML Ops Engineer"
## [15] "Robotics Engineer" "Head Of AI"
## [17] "Deep Learning Engineer" "Data Scientist"
## [19] "Machine Learning Researcher" "Computer Vision Engineer"
## [1] "character"
## [1] "USD" "EUR" "GBP"
## [1] "m" "M" "L" "S" "s" "l"
## [1] "50" "100" "0" "0%" "fifty%"
## [6] "100%" "50%" "fifty" "100 percent"
## [1] "50" "100" "0" "fifty" "100 percent"
## [1] 50 100 0
## [1] "bachelor" "Master" "associate" "PhD" "MASTER" "ASSOCIATE"
## [7] "phd" "PHD" "Bachelor" "Associate" "master" "BACHELOR"
## [1] "Bachelor" "Master" "Associate" "PhD"
## [1] FALSE
## [1] "10/18/2024" "11/20/2024" "3/18/2025" "23-Dec-24" "15-Apr-25"
## [6] "8/31/2024" "12/29/2024" "06/07/2024" "04-Nov-24" "20-Oct-24"
## [11] "1/29/2025" "16-Jul-24" "29-Dec-24" "02/11/2024" "02-Oct-24"
## [16] "9/28/2024" "02/06/2025" "24-Apr-24" "4/30/2024" "01-May-24"
## [21] "6/21/2024" "23-Apr-24" "13-Mar-25" "27-Oct-24" "08/01/2024"
## [26] "18-Nov-24" "01/10/2025" "10/08/2024" "1/18/2024" "16-May-24"
## [31] "1/31/2024" "8/28/2024" "03/01/2024" "10/26/2024" "31-Jan-24"
## [36] "10/17/2024" "1/14/2024" "15-Jun-24" "2/24/2024" "3/17/2025"
## [41] "02/01/2025" "4/24/2024" "03/03/2025" "10-Jan-24" "11/16/2024"
## [46] "09/05/2024" "08/08/2024" "03/10/2025" "8/16/2024" "03/12/2024"
## [51] "07-Jul-24" "08/05/2024" "7/29/2024" "28-Sep-24" "3/22/2025"
## [56] "02/09/2024" "3/20/2024" "6/22/2024" "05-Jun-24" "3/23/2024"
## [61] "20-Feb-24" "12/15/2024" "15-Dec-24" "01-Sep-24" "02/09/2025"
## [66] "30-Aug-24" "31-Oct-24" "14-Feb-24" "31-May-24" "08/02/2024"
## [71] "11/29/2024" "04/08/2025" "19-Sep-24" "06/10/2024" "20-Apr-24"
## [76] "13-Feb-25" "31-Aug-24" "04/01/2024" "02/05/2024" "6/20/2024"
## [81] "6/19/2024" "14-Jul-24" "16-Apr-25" "15-Feb-25" "20-May-24"
## [86] "11/19/2024" "26-Apr-25" "10-Jul-24" "04/10/2024" "04/05/2025"
## [91] "12/04/2024" "01/12/2025" "4/20/2024" "01/02/2025" "25-Jan-24"
## [96] "27-Jan-24" "21-Jul-24" "03-Mar-25" "9/15/2024" "1/28/2024"
## [101] "12/13/2024" "09/04/2024" "08-Mar-24" "5/13/2024" "3/18/2024"
## [106] "13-Mar-24" "14-Aug-24" "06/04/2024" "08/11/2024" "10/22/2024"
## [111] "15-Apr-24" "5/18/2024" "01/06/2025" "26-Mar-25" "19-Apr-25"
## [116] "12/25/2024" "04-Dec-24" "24-Aug-24" "18-Mar-25" "03-Aug-24"
## [121] "03/02/2025" "3/28/2024" "02/03/2025" "16-Jan-25" "08/09/2024"
## [126] "11/06/2024" "08-Aug-24" "05-Jan-25" "24-May-24" "12-Apr-25"
## [131] "9/23/2024" "03/09/2024" "11/17/2024" "22-May-24" "12/02/2024"
## [136] "3/30/2024" "04-Jul-24" "25-Oct-24" "4/21/2024" "23-May-24"
## [141] "01/12/2024" "19-Mar-24" "03/04/2025" "12-Dec-24" "3/14/2024"
## [146] "6/25/2024" "05-Oct-24" "1/30/2025" "11-Aug-24" "7/19/2024"
## [151] "06-Jan-24" "25-Aug-24" "20-Nov-24" "3/15/2025" "3/13/2025"
## [156] "01/08/2025" "09-Nov-24" "01-Nov-24" "11-Jul-24" "10-Mar-24"
## [161] "9/20/2024" "1/28/2025" "10/07/2024" "06/01/2024" "07/07/2024"
## [166] "02/06/2024" "30-Sep-24" "5/23/2024" "20-Dec-24" "10-Jun-24"
## [171] "10/15/2024" "02/01/2024" "7/14/2024" "07-Aug-24" "17-Jul-24"
## [176] "17-Feb-25" "2/15/2025" "2/14/2024" "07-Feb-25" "25-Feb-24"
## [181] "23-Aug-24" "29-Mar-25" "20-Mar-25" "6/14/2024" "22-Nov-24"
## [186] "11/26/2024" "08-Oct-24" "3/26/2025" "29-Mar-24" "14-Sep-24"
## [191] "19-Mar-25" "1/30/2024" "4/15/2025" "7/15/2024" "09/08/2024"
## [196] "4/15/2024" "1/25/2025" "07-Feb-24" "4/20/2025" "02/08/2025"
## [201] "1/27/2024" "09-Mar-24" "4/18/2024" "1/15/2025" "30-Mar-25"
## [206] "10/11/2024" "04-Feb-25" "22-Mar-24" "07/03/2024" "4/27/2025"
## [211] "16-Dec-24" "07/01/2024" "19-Feb-24" "9/19/2024" "02/11/2025"
## [216] "03/06/2025" "12/09/2024" "02-Feb-25" "12-Apr-24" "11/09/2024"
## [221] "25-Dec-24" "9/13/2024" "10-Oct-24" "28-Jan-25" "04-Mar-25"
## [226] "1/20/2024" "05-May-24" "12/14/2024" "05/06/2024" "21-Jan-24"
## [231] "8/20/2024" "07-Dec-24" "15-Mar-25" "21-Dec-24" "03/11/2024"
## [236] "1/27/2025" "06-Jun-24" "12/17/2024" "03-Apr-25" "21-Mar-25"
## [241] "26-Jun-24" "4/18/2025" "2/17/2025" "03/08/2025" "27-Jun-24"
## [246] "10/25/2024" "28-Apr-24" "4/27/2024" "03-Feb-25" "8/22/2024"
## [251] "08-Feb-25" "2/16/2024" "10/09/2024" "05-Feb-24" "5/17/2024"
## [256] "1/19/2025" "2/23/2024" "30-Apr-25" "2/22/2024" "05/05/2024"
## [261] "27-Apr-24" "4/23/2025" "3/14/2025" "3/24/2024" "04/11/2025"
## [266] "20-Jun-24" "2/18/2025" "24-Feb-24" "1/22/2024" "3/26/2024"
## [271] "04/07/2025" "15-Jan-25" "04-Apr-25" "06-Mar-25" "8/24/2024"
## [276] "05/02/2024" "2/26/2025" "27-Jul-24" "10/19/2024" "8/19/2024"
## [281] "04-Jan-24" "09-Aug-24" "18-Jan-25" "7/31/2024" "06-Apr-25"
## [286] "12-Aug-24" "06-Oct-24" "12/16/2024" "13-Aug-24" "6/23/2024"
## [291] "03-Jul-24" "3/21/2025" "06-May-24" "22-Jun-24" "12-Jan-25"
## [296] "08-Jan-24" "28-Jul-24" "15-Jan-24" "4/14/2024" "1/29/2024"
## [301] "02-Jun-24" "01-Apr-25" "3/31/2024" "06-Jul-24" "10/05/2024"
## [306] "08/07/2024" "8/13/2024" "04/09/2025" "5/16/2024" "04/04/2024"
## [311] "12/26/2024" "8/21/2024" "17-May-24" "1/13/2024" "03/07/2025"
## [316] "08/12/2024" "5/20/2024" "8/14/2024" "06-Jan-25" "5/21/2024"
## [321] "14-Jun-24" "1/21/2024" "5/15/2024" "30-Mar-24" "4/25/2025"
## [326] "01/08/2024" "8/15/2024" "05/01/2024" "6/17/2024" "07/06/2024"
## [331] "05/07/2024" "22-Apr-24" "21-Apr-25" "13-Feb-24" "8/23/2024"
## [336] "10/31/2024" "10/12/2024" "03/09/2025" "1/26/2025" "18-Feb-25"
## [341] "05/11/2024" "1/31/2025" "04/10/2025" "10/04/2024" "6/15/2024"
## [346] "02-Nov-24" "09/02/2024" "11/27/2024" "3/27/2025" "11/13/2024"
## [351] "06/02/2024" "01/09/2024" "12-Jun-24" "14-Apr-24" "12-Oct-24"
## [356] "18-Feb-24" "11/11/2024" "02-Aug-24" "12/10/2024" "06-Aug-24"
## [361] "08-Apr-24" "5/14/2024" "22-Dec-24" "12/12/2024" "7/25/2024"
## [366] "04/12/2025" "01/11/2024" "3/31/2025" "07/05/2024" "2/28/2024"
## [371] "11/07/2024" "09-Sep-24" "3/19/2025" "17-Sep-24" "14-Dec-24"
## [376] "05/09/2024" "11/24/2024" "17-Apr-25" "09/12/2024" "14-Apr-25"
## [381] "01/04/2025" "07-May-24" "06/08/2024" "3/20/2025" "2/29/2024"
## [386] "27-Feb-25" "01/03/2024" "6/27/2024" "4/29/2025" "28-Feb-25"
## [391] "5/26/2024" "17-Oct-24" "01-Feb-25" "10-Aug-24" "7/26/2024"
## [396] "13-Jul-24" "02/12/2025" "6/16/2024" "13-Jan-25" "10/16/2024"
## [401] "25-Apr-25" "08-Feb-24" "17-Jan-24" "04/08/2024" "4/25/2024"
## [406] "14-Nov-24" "7/30/2024" "10/21/2024" "03/08/2024" "18-Jan-24"
## [411] "04-May-24" "12/05/2024" "9/16/2024" "08-Jul-24" "06/11/2024"
## [416] "13-Jan-24" "31-Dec-24" "09/01/2024" "02/02/2024" "1/17/2024"
## [421] "13-Dec-24" "04-Mar-24" "05/03/2024" "12-May-24" "03/05/2024"
## [426] "02/10/2024" "01-Mar-24" "19-Feb-25" "07/08/2024" "24-Nov-24"
## [431] "23-Sep-24" "30-May-24" "10/10/2024" "19-Jan-25" "23-Nov-24"
## [436] "26-Mar-24" "03-Mar-24" "2/19/2025" "27-Nov-24" "07-Sep-24"
## [441] "06/12/2024" "8/17/2024" "4/24/2025" "5/25/2024" "10/23/2024"
## [446] "19-Aug-24" "14-Oct-24" "15-Oct-24" "10-Apr-24" "2/24/2025"
## [451] "20-Mar-24" "21-Mar-24" "12/28/2024" "29-Feb-24" "20-Sep-24"
## [456] "07-Apr-24" "09/07/2024" "07-Mar-25" "7/18/2024" "29-Apr-24"
## [461] "11/02/2024" "09-Apr-25" "11/18/2024" "4/21/2025" "26-May-24"
## [466] "24-Dec-24" "04/06/2024" "08/06/2024" "11/21/2024" "01/05/2024"
## [471] "11/15/2024" "2/23/2025" "6/29/2024" "09-Apr-24" "12/07/2024"
## [476] "25-Jun-24" "9/18/2024" "12/08/2024" "10/27/2024" "09/10/2024"
## [481] "04-Apr-24" "06/09/2024" "02-Apr-25" "03/10/2024" "07-Jun-24"
## [486] "19-Jul-24" "06-Mar-24" "06/03/2024" "02-May-24" "6/13/2024"
## [491] "28-Mar-25" "11/25/2024" "07/12/2024" "04/12/2024" "09/09/2024"
## [496] "2/13/2025" "8/26/2024" "4/17/2025" "20-Aug-24" "2/14/2025"
## [501] "10-Sep-24" "12/22/2024" "18-Dec-24" "3/28/2025" "04/07/2024"
## [506] "12-Feb-24" "30-Jan-24" "04/01/2025" "7/20/2024" "19-Dec-24"
## [511] "24-Jan-25" "10/29/2024" "03/11/2025" "11/28/2024" "4/19/2024"
## [516] "03/05/2025" "09-Oct-24" "04-Jan-25" "06-Apr-24" "2/28/2025"
## [521] "15-Sep-24" "01/01/2025" "17-Feb-24" "10-Feb-25" "26-Feb-24"
## [526] "08/04/2024" "01/07/2024" "26-Feb-25" "1/18/2025" "9/27/2024"
## [531] "23-Feb-25" "2/25/2025" "1/22/2025" "05-Jan-24" "11-Mar-24"
## [536] "7/23/2024" "09-Mar-25" "12/21/2024" "01/03/2025" "22-Jan-24"
## [541] "3/29/2025" "1/25/2024" "06/05/2024" "05-Dec-24" "30-Jun-24"
## [546] "3/27/2024" "07-Jan-25" "9/24/2024" "08-Apr-25" "02/12/2024"
## [551] "4/22/2025" "1/16/2025" "3/22/2024" "06-Sep-24" "19-Apr-24"
## [556] "11/10/2024" "03-Jan-24" "24-Mar-25" "1/17/2025" "11/03/2024"
## [561] "06-Nov-24" "28-Apr-25" "27-Apr-25" "07/04/2024" "27-Feb-24"
## [566] "3/23/2025" "04/05/2024" "1/14/2025" "7/17/2024" "16-Jun-24"
## [571] "08-Sep-24" "01-Jul-24" "07-Apr-25" "18-Sep-24" "05-Aug-24"
## [576] "05/08/2024" "03/01/2025" "03-Jun-24" "21-Feb-24" "06-Feb-24"
## [581] "25-Nov-24" "24-Jun-24" "08/10/2024" "29-Oct-24" "12/20/2024"
## [586] "08-Dec-24" "30-Jan-25" "16-Mar-25" "2/25/2024" "9/14/2024"
## [591] "02-Mar-25" "4/13/2025" "4/23/2024" "11/08/2024" "03/06/2024"
## [596] "11-Mar-25" "21-Sep-24" "14-May-24" "29-Apr-25" "11/30/2024"
## [601] "21-Feb-25" "07/02/2024" "1/19/2024" "12/01/2024" "01-Mar-25"
## [606] "07/11/2024" "12/11/2024" "1/23/2025" "02/05/2025" "21-Jan-25"
## [611] "19-Jan-24" "25-May-24" "29-Jun-24" "10/06/2024" "24-Jan-24"
## [616] "15-May-24" "17-Dec-24" "2/21/2025" "17-Jun-24" "20-Feb-25"
## [621] "27-Sep-24" "5/28/2024" "15-Feb-24" "09/03/2024" "11-Nov-24"
## [626] "10-Nov-24" "02/08/2024" "04-Jun-24" "10/30/2024" "22-Apr-25"
## [631] "10/20/2024" "4/16/2024" "3/16/2024" "28-Jun-24" "11-Apr-25"
## [636] "6/18/2024" "16-Sep-24" "24-Apr-25" "09-Feb-25" "5/29/2024"
## [641] "04-Feb-24" "4/14/2025" "4/28/2024" "04-Sep-24" "28-Jan-24"
## [646] "03/04/2024" "28-Nov-24" "31-Jul-24" "01-Aug-24" "8/27/2024"
## [651] "10/24/2024" "9/21/2024" "18-Apr-24" "04/11/2024" "26-Dec-24"
## [656] "1/13/2025" "24-Mar-24" "04/04/2025" "22-Jul-24" "22-Sep-24"
## [661] "29-Sep-24" "01/07/2025" "02/07/2024" "28-May-24" "12/24/2024"
## [666] "04/09/2024" "11-Jan-25" "02/03/2024" "11/23/2024" "12/31/2024"
## [671] "10/01/2024" "29-Aug-24" "07-Mar-24" "3/13/2024" "4/26/2024"
## [676] "8/30/2024" "2/20/2024" "7/16/2024" "04/02/2024" "07-Nov-24"
## [681] "09-Jan-25" "28-Aug-24" "12-Sep-24" "4/16/2025" "19-May-24"
## [686] "06-Feb-25" "10/13/2024" "6/30/2024" "01-Dec-24" "03/02/2024"
## [691] "11-Jun-24" "27-Aug-24" "01/06/2024" "21-Oct-24" "03-Dec-24"
## [696] "23-Apr-25" "04/03/2024" "04/02/2025" "3/16/2025" "1/21/2025"
## [701] "16-Apr-24" "1/24/2024" "4/22/2024" "10/14/2024" "2/17/2024"
## [706] "09-Jun-24" "11-Feb-24" "03/07/2024" "12/03/2024" "17-Aug-24"
## [711] "12/27/2024" "05/10/2024" "03-Sep-24" "22-Jan-25" "2/27/2025"
## [716] "2/22/2025" "17-Nov-24" "01/04/2024" "9/29/2024" "22-Oct-24"
## [721] "7/21/2024" "22-Feb-24" "11/22/2024" "29-Nov-24" "01/09/2025"
## [726] "5/19/2024" "27-Dec-24" "11-May-24" "30-Jul-24" "15-Nov-24"
## [731] "08-Mar-25" "3/17/2024" "22-Feb-25" "05-Jul-24" "21-Apr-24"
## [736] "04/03/2025" "8/29/2024" "05/04/2024" "1/26/2024" "18-Aug-24"
## [741] "21-Aug-24" "2/13/2024" "23-Mar-25" "23-Mar-24" "13-Nov-24"
## [746] "17-Mar-25" "4/28/2025" "7/22/2024" "05/12/2024" "2/19/2024"
## [751] "28-Feb-24" "5/22/2024" "24-Feb-25" "4/17/2024" "18-May-24"
## [756] "26-Aug-24" "27-May-24" "04-Oct-24" "9/26/2024" "4/13/2024"
## [761] "25-Mar-25" "14-Jan-24" "12/30/2024" "01-Apr-24" "30-Dec-24"
## [766] "4/26/2025" "03-Jan-25" "20-Jan-24" "23-Oct-24" "02/07/2025"
## [771] "4/30/2025" "02/04/2025" "26-Oct-24" "26-Jul-24" "11/14/2024"
## [776] "7/28/2024" "07/10/2024" "6/24/2024" "5/30/2024" "02-Dec-24"
## [781] "15-Mar-24" "05-Mar-25" "13-Oct-24" "01-Jan-24" "21-Jun-24"
## [786] "6/28/2024" "11-Dec-24" "24-Jul-24" "26-Nov-24" "18-Jun-24"
## [791] "2/26/2024" "08-Jan-25" "08/03/2024" "08-Nov-24" "3/19/2024"
## [796] "16-Oct-24" "9/22/2024" "06/06/2024" "9/17/2024" "3/21/2024"
## [801] "31-Jan-25" "05-Apr-25" "01-Oct-24" "2/21/2024" "10/03/2024"
## [806] "2/27/2024" "11/04/2024" "01/10/2024" "26-Apr-24" "01-Jan-25"
## [811] "02-Mar-24" "1/15/2024" "20-Jul-24" "6/26/2024" "30-Oct-24"
## [816] "02/02/2025" "19-Jun-24" "20-Jan-25" "5/24/2024" "01/02/2024"
## [821] "3/25/2025" "12-Mar-25" "10-Apr-25" "7/24/2024" "03-May-24"
## [826] "05-Apr-24" "12-Mar-24" "09-Feb-24" "23-Jul-24" "02-Jan-25"
## [831] "19-Nov-24" "05-Nov-24" "03-Feb-24" "26-Jan-24" "3/15/2024"
## [836] "27-Mar-25" "24-Oct-24" "02/10/2025" "08-May-24" "12-Jan-24"
## [841] "28-Oct-24" "2/16/2025" "3/30/2025" "11-Sep-24" "26-Sep-24"
## [846] "03-Nov-24" "1/16/2024" "31-Mar-24" "3/29/2024" "03/03/2024"
## [851] "12-Feb-25" "18-Mar-24" "22-Aug-24" "23-Jan-25" "09-Dec-24"
## [856] "15-Aug-24" "02/04/2024" "11/05/2024" "23-Jun-24" "17-Apr-24"
## [861] "01/01/2024" "09/11/2024" "10-Dec-24" "27-Mar-24" "12/19/2024"
## [866] "18-Apr-25" "28-Dec-24" "03-Apr-24" "09/06/2024" "25-Apr-24"
## [871] "13-Sep-24" "28-Mar-24" "01/11/2025" "14-Mar-24" "02-Jul-24"
## [876] "09-Jul-24" "16-Nov-24" "1/20/2025" "05-Mar-24" "25-Sep-24"
## [881] "07/09/2024" "25-Mar-24" "01-Jun-24" "11/01/2024" "1/24/2025"
## [886] "12-Nov-24" "10-May-24" "03-Oct-24" "31-Mar-25" "1/23/2024"
## [891] "9/30/2024" "13-Jun-24" "17-Mar-24" "10-Feb-24" "08-Jun-24"
## [896] "22-Mar-25" "25-Jul-24" "10-Mar-25" "27-Jan-25" "11/12/2024"
## [901] "16-Aug-24" "10/28/2024" "10/02/2024" "9/25/2024" "2/15/2024"
## [906] "01-Feb-24" "02-Apr-24" "12/23/2024" "25-Feb-25" "12-Jul-24"
## [911] "29-Jan-25" "13-May-24" "02-Feb-24" "7/27/2024" "2/20/2025"
## [916] "15-Jul-24" "05-Feb-25" "2/18/2024" "17-Jan-25" "19-Oct-24"
## [921] "05-Sep-24" "4/29/2024" "16-Jan-24" "14-Jan-25" "11-Jan-24"
## [926] "24-Sep-24" "29-Jan-24" "4/19/2025" "07-Oct-24" "14-Feb-25"
## [931] "11-Feb-25" "25-Jan-25" "14-Mar-25" "10-Jan-25" "26-Jan-25"
## [936] "13-Apr-25" "30-Apr-24" "5/27/2024" "29-Jul-24" "8/18/2024"
## [941] "09-May-24" "02-Sep-24" "3/25/2024" "29-May-24" "5/31/2024"
## [946] "01/05/2025" "23-Feb-24" "8/25/2024" "03/12/2025" "09-Jan-24"
## [951] "12/18/2024" "11-Oct-24" "12/06/2024" "3/24/2025" "7/13/2024"
## [956] "13-Apr-24" "11-Apr-24" "02-Jan-24" "06-Dec-24" "07-Jan-24"
## [961] "16-Feb-24" "30-Nov-24" "23-Jan-24" "18-Jul-24" "20-Apr-25"
## [966] "04/06/2025" "16-Feb-25" "04-Aug-24" "21-May-24" "16-Mar-24"
## [971] "21-Nov-24" "18-Oct-24"
## Date[1:15000], format: "2024-10-18" "2024-11-20" "2025-03-18" "2024-12-23" "2025-04-15" ...
## Date[1:15000], format: "2024-11-07" "2025-01-11" "2025-04-07" "2025-02-24" "2025-06-23" ...
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 6.30 7.60 12.12 9.00 99.90
## # A tibble: 6 × 3
## job_id job_title benefits_score
## <chr> <chr> <dbl>
## 1 AI00001 AI Research Scientist 99.9
## 2 AI00021 Data Engineer 99.9
## 3 AI00041 Data Scientist 99.9
## 4 AI00061 Data Scientist 99.9
## 5 AI00081 Data Analyst 99.9
## 6 AI00101 AI Product Manager 99.9
## [1] 750
## [1] 14250
## [1] 0
## [1] 750
## [1] 15000
## Rentang_Nilai Jumlah_Data Persentase
## 1 0 – 10 14250 95
## 2 10 – 99.9 0 0
## 3 99.9 750 5
## [1] 130
## [1] 14250
## [1] "TechCorp Inc" "Autonomous Tech"
## [3] "Future Systems" "Advanced Robotics"
## [5] "Neural Networks Co" "DataVision Ltd"
## [7] "Cloud AI Solutions" "Quantum Computing Inc"
## [9] "Smart Analytics" "Predictive Systems"
## [11] "AI Innovations" "Algorithmic Solutions"
## [13] "Cognitive Computing" "DeepTech Ventures"
## [15] "Machine Intelligence Group" "Digital Transformation LLC"
##
## TechCorp Inc Cognitive Computing
## 929 926
## AI Innovations Digital Transformation LLC
## 915 915
## Future Systems Quantum Computing Inc
## 913 907
## Cloud AI Solutions Predictive Systems
## 903 901
## Advanced Robotics Smart Analytics
## 887 881
## Autonomous Tech Machine Intelligence Group
## 873 871
## Neural Networks Co DeepTech Ventures
## 868 856
## DataVision Ltd Algorithmic Solutions
## 855 850
# --- 1. Top 10 Job Titles ---
# Ambil 10 job_title paling umum
top10_jobs <- data %>%
count(job_title, sort = TRUE) %>%
slice_max(n, n = 10)
# Buat gradasi warna dari ungu tua ke pink muda
gradasi_ungu <- colorRampPalette(c("#4B2E59", "#E1BEE7"))(10)
# Plot horizontal bar chart
p_top10 <- ggplot(top10_jobs, aes(x = n, y = fct_reorder(job_title, n))) +
geom_col(fill = gradasi_ungu) +
labs(title = "Top 10 Job Titles", x = "Jumlah Lowongan Kerja", y = NULL) +
theme_minimal(base_family = "sans") +
theme(
plot.title = element_text(face = "bold", size = 16, color = "#D6A8D9"),
plot.background = element_rect(fill = NA, color = NA),
panel.background = element_rect(fill = NA)
)
# Simpan hasil ke file PNG transparan
ggsave("output_plot/top10_job_title_gradient.png", plot = p_top10, width = 7, height = 5, bg = "transparent")
# Tampilkan plot di Viewer
print(p_top10)
# --- 2. Top 5 Company Locations (Pie Chart) ---
# --- Ambil Top 5 Company Locations ---
top_lokasi <- data %>%
count(company_location, sort = TRUE) %>%
slice_max(n, n = 5) %>%
mutate(prop = n / sum(n)) %>%
arrange(desc(prop)) %>% # urutkan dari yang proporsi besar ke kecil
mutate(
label = company_location,
ypos = cumsum(prop) - 0.5 * prop
)
# ---Buat warna gradasi dari gelap ke terang ---
fill_colors <- colorRampPalette(c("#2C005B", "#E9D5FF"))(nrow(top_lokasi))
names(fill_colors) <- top_lokasi$label
# --- Ambil Top 5 Company Locations ---
top_lokasi <- data %>%
count(company_location, sort = TRUE) %>%
slice_max(n, n = 5) %>%
mutate(prop = n / sum(n)) %>%
arrange(desc(prop)) %>%
mutate(
ypos = cumsum(prop) - 0.5 * prop
)
# Buat warna gradasi dari gelap ke terang ---
fill_colors <- colorRampPalette(c("#2C005B", "#E9D5FF"))(nrow(top_lokasi))
names(fill_colors) <- top_lokasi$company_location # pakai nama asli tanpa persentase
# --- Buat Pie Chart TANPA persen di legend ---
p <- ggplot(top_lokasi, aes(x = "", y = prop, fill = company_location)) +
geom_col(width = 1, color = "black", alpha = 0.85) +
coord_polar(theta = "y") +
geom_text(aes(y = ypos, label = paste0(round(prop * 100, 1), "%")),
color = "black", size = 4) +
scale_fill_manual(values = fill_colors) +
labs(title = "Top 5 Company Locations for AI Jobs") +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14, color = "black"),
legend.position = "right",
legend.text = element_text(size = 10, color = "grey2"),
legend.title = element_blank(),
legend.background = element_rect(fill = "transparent", color = NA),
legend.key = element_rect(fill = "transparent", color = NA),
plot.background = element_rect(fill = "transparent", color = NA),
panel.background = element_rect(fill = "transparent", color = NA)
)
# ---Simpan plot ---
ggsave("output_plot/pie_company_location_final_nopct_legend.png", plot = p,
width = 8, height = 6, dpi = 300, bg = "transparent")
# Tampilkan plot di Viewer
print(p)
Distribusi lokasi perusahaan AI di lima negara sangat merata, dengan Austria memimpin tipis (20,7%) yang kemudian disusul empat negara lain. Ini menunjukkan bahwa peluang karier AI tersebar luas di berbagai negara, tidak hanya terpusat di satu lokasi dominan.
# --- 3. experience_level ---
# Mapping experience level agar lebih deskriptif
experience_mapping <- c(
"EN" = "EN (Entry)",
"MI" = "MI (Mid)",
"SE" = "SE (Senior)",
"EX" = "EX (Expert)"
)
# Hitung jumlah lowongan per experience level
exp_plot_data <- data %>%
count(experience_level) %>%
mutate(experience_label = experience_mapping[experience_level])
# Buat plot
p_experience <- ggplot(exp_plot_data, aes(x = experience_label, y = n, fill = experience_label)) +
geom_col(width = 0.7) +
geom_text(aes(label = n), vjust = -0.5, color = "black", size = 5) +
scale_fill_manual(values = c(
"EN (Entry)" = "#E1BEE7",
"MI (Mid)" = "#AB47BC",
"SE (Senior)" = "#CE93D8",
"EX (Expert)" = "#6A1B9A"
)) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) + # Tambah ruang atas 10%
labs(
title = "Distribusi Tingkat Pengalaman",
x = "Experience Level",
y = "Jumlah Lowongan Pekerjaan",
caption = "Keterangan: EN = Entry | MI = Mid | SE = Senior | EX = Expert"
) +
theme_minimal(base_family = "sans") +
theme(
plot.title = element_text(color = "black", face = "bold", size = 16, hjust = 0.5),
axis.text = element_text(color = "grey20", size = 12),
axis.title = element_text(color = "grey20", size = 13),
plot.caption = element_text(color = "grey40", size = 10, hjust = 0),
legend.position = "none",
panel.grid = element_blank(),
plot.background = element_rect(fill = "transparent", color = NA),
panel.background = element_rect(fill = "transparent", color = NA),
plot.title.position = "plot",
plot.margin = margin(t = 60, r = 30, b = 30, l = 30)
)
ggsave("plot_experience.png",
plot = p_experience,
width = 10, height = 7, dpi = 300, bg = "transparent")
# Tampilakn plot
print(p_experience)
Lowongan AI tersedia secara merata untuk semua level pengalaman baik dari Entry hingga Expert. Artinya, baik pemula maupun profesional berpengalaman punya peluang besar untuk masuk ke industri.
# --- 4. education_required ---
edu_dist <- data %>%
count(education_required) %>%
mutate(education_required = reorder(education_required, -n)) %>%
arrange(desc(n)) %>%
mutate(fill_color = colorRampPalette(c("#4B0082", "#D8BFD8"))(n()))
gg_edu <- ggplot(edu_dist, aes(x = education_required, y = n, fill = fill_color)) +
geom_col() +
geom_text(aes(label = n),
vjust = -0.3, size = 4, color = "grey") +
scale_fill_identity() +
labs(title = "Distribusi Pendidikan yang Dibutuhkan",
x = "Tingkat Pendidikan",
y = "Jumlah Lowongan Pekerjaan") +
theme_minimal(base_family = "sans") +
theme(
panel.grid = element_blank(), # Hilangkan semua grid
plot.background = element_rect(fill = "transparent", color = NA),
panel.background = element_rect(fill = "transparent", color = NA),
axis.text = element_text(color = "grey2"),
axis.title = element_text(color = "grey2"),
plot.title = element_text(face = "bold", size = 14, color = "black"),
legend.position = "none"
)
# Simpan grafik
ggsave("output_plot/grafik_education_required_cleaned.png", gg_edu, bg = "transparent", width = 8, height = 6)
# Tampilkan plot di Viewer
print(gg_edu)
Lowongan pekerjaan di bidang AI tidak mensyaratkan gelar tinggi secara mutlak. Bahkan lulusan Associate dan Bachelor memiliki jumlah lowongan yang setara atau lebih banyak dibanding Master dan PhD. Artinya, kesempatan masuk ke dunia AI terbuka luas bagi semua jenjang pendidikan.
# --- 5. Distribusi Company Size ---
# 1. Ubah menjadi faktor di urutan S, M, L
data_company <- data %>%
filter(!is.na(company_size)) %>%
mutate(company_size = factor(company_size, levels = c("S", "M", "L")))
# 2. Hitung jumlah masing-masing kategori
company_size_plot <- data_company %>%
count(company_size)
# 3. Plot
p_company_size <- ggplot(company_size_plot, aes(x = company_size, y = n, fill = company_size)) +
geom_col(width = 0.7) +
geom_text(aes(label = n), vjust = -0.5, color = "grey2", size = 5) +
scale_fill_manual(values = c("S" = "#CE93D8", "M" = "#9C27B0", "L" = "#E1BEE7")) +
labs(
title = "Distribusi Ukuran Perusahaan",
x = "Ukuran (S, M, L)",
y = "Jumlah"
) +
theme_minimal(base_size = 13) +
theme(
panel.grid = element_blank(),
plot.title = element_text(color = "black", face = "bold", size = 16),
axis.text = element_text(color = "grey2", size = 12),
axis.title = element_text(color = "black"),
plot.background = element_rect(fill = "transparent", color = NA),
panel.background = element_rect(fill = "transparent", color = NA),
legend.position = "none",
plot.margin = margin(t = 40, r = 10, b = 10, l = 10)
)+
coord_cartesian(clip = "off")
# 4. Simpan
ggsave(
filename = "output_plot/distribusi_company_size.png",
plot = p_company_size,
width = 8, height = 6,
dpi = 300,
bg = "transparent"
)
# 5. Tampilkan
print(p_company_size)
Lowongan pekerjaan AI tersebar merata di semua ukuran perusahaan, baik kecil (S), menengah (M), maupun besar (L). Ini menunjukkan bahwa peluang karier di bidang AI tidak hanya terbatas pada perusahaan besar, tetapi juga terbuka luas di perusahaan kecil dan menengah.
# --- 6. Barchart Top 10 Pekerjaan dengan Gaji Tertinggi ---
# 1. Hitung rata-rata gaji per job_title
top10_jobs <- data %>%
group_by(job_title) %>%
summarise(mean_salary = mean(salary_usd, na.rm = TRUE)) %>%
arrange(desc(mean_salary)) %>%
slice_head(n = 10)
# 2. Tambahkan industri dominan
top10_with_industry <- data %>%
filter(job_title %in% top10_jobs$job_title) %>%
group_by(job_title, industry) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(job_title) %>%
slice_max(n, n = 1) %>%
select(job_title, industry) %>%
left_join(top10_jobs, by = "job_title")
# 3. Gradasi warna ungu
purple_gradient <- colorRampPalette(c("#4B0082", "#D8BFD8"))(length(unique(top10_with_industry$industry)))
# 4. Plot bar horizontal dengan perbaikan label & ruang kanan
p_top10_clean <- ggplot(top10_with_industry, aes(x = reorder(job_title, mean_salary),
y = mean_salary, fill = industry)) +
geom_col(width = 0.7) +
coord_flip(clip = "off") + # biar label bisa keluar
geom_text(aes(label = paste0("$", round(mean_salary))),
hjust = -0.1, color = "grey", size = 4) +
scale_fill_manual(values = purple_gradient) +
labs(
title = "Top 10 Pekerjaan dengan Gaji Tertinggi",
subtitle = "Warna berdasarkan industri dominan",
x = "Job Title", y = "Rata-rata Gaji (USD)", fill = "Industry"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.2))) + # ruang kanan 20%
theme_minimal(base_family = "sans") +
theme(
plot.background = element_rect(fill = "transparent", color = NA),
panel.background = element_rect(fill = "transparent", color = NA),
panel.grid = element_blank(),
axis.text = element_text(color = "grey2"),
axis.title = element_text(color = "grey"),
plot.title = element_text(color = "black", face = "bold", size = 16),
plot.subtitle = element_text(color = "purple", size = 12),
legend.title = element_text(color = "grey2"),
legend.text = element_text(color = "grey"),
legend.position = "right"
)
# 5. Simpan sebagai PNG transparan
ggsave("top10_job_salary_colored_by_industry.png",
plot = p_top10_clean,
width = 14, height = 7, dpi = 300, bg = "transparent")
# Tampilkan plot
print(p_top10_clean)
Profesi berbasis AI mendominasi posisi dengan gaji tertinggi, seperti AI Specialist, Machine Learning Engineer, dan Head of AI. Ini mencerminkan permintaan tinggi terhadap keahlian AI di berbagai industri. Meskipun lebih teknis, peran seperti AI Consultant dan Principal Data Scientist juga tetap masuk top 10, menandakan kombinasi teknis dan strategis sangat dihargai di pasar kerja saat ini.
# --- 7. Heatmap Job Title X Industry ---
#Heatmap Ringkas: Job Title × Industri (Tapi Top 5-7 Saja)
#Masalah: Ada 15 industri → heatmap terlalu padat.
#Solusi:
# Ambil 5 industri dengan median gaji tertinggi
library(dplyr)
library(ggplot2)
# Ambil 5 industri dengan median gaji tertinggi
top_industri <- data %>%
group_by(industry) %>%
summarise(median_salary = median(salary_usd, na.rm = TRUE)) %>%
arrange(desc(median_salary)) %>%
slice(1:5) %>%
pull(industry)
# Ambil Top 10 job_title dengan rata-rata gaji tertinggi di industri tersebut
top_jobs <- data %>%
filter(industry %in% top_industri) %>%
group_by(job_title) %>%
summarise(avg_salary = mean(salary_usd, na.rm = TRUE)) %>%
arrange(desc(avg_salary)) %>%
slice(1:10) %>%
pull(job_title)
# Buat heatmap dan simpan ke variabel
p_heatmap <- data %>%
filter(industry %in% top_industri, job_title %in% top_jobs) %>%
group_by(industry, job_title) %>%
summarise(avg_salary = mean(salary_usd, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = industry, y = reorder(job_title, avg_salary), fill = avg_salary)) +
geom_tile(color = "white") +
geom_text(aes(label = round(avg_salary, 0)), size = 3, color = "white") +
scale_fill_gradientn(
colours = c("#f2e5ff", "#b983ff", "#6a00ff"),
name = "Gaji Rata-rata (USD)"
) +
labs(
title = "Top 10 Gaji AI: Job Title vs Industri",
x = "Industri", y = "Posisi Pekerjaan"
) +
theme_minimal() +
theme(
panel.background = element_rect(fill = "transparent", color = NA),
plot.background = element_rect(fill = "transparent", color = NA),
legend.background = element_rect(fill = "transparent", color = NA),
legend.key = element_rect(fill = "transparent", color = NA),
axis.text = element_text(color = "grey2"),
axis.title = element_text(color = "purple"),
plot.title = element_text(color = "purple3", face = "bold", size = 14, hjust = 0.5),
legend.title = element_text(color = "grey2"),
legend.text = element_text(color = "grey"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
# Simpan dengan background transparan
ggsave("heatmap_top10_salary_by_industry1.png",
plot = p_heatmap,
width = 10, height = 6, dpi = 300, bg = "transparent")
#Tampilkan Plot
print(p_heatmap)
Gaji tertinggi dicapai oleh ML Ops Engineer di industri Education (USD138K), Industri seperti Software, Finance, dan Media punya range kompensasi paling kompetitif. Profesi yang sama bisa beda gaji tergantung industrinya.
# --- 8. Stacked Column Chart Jenis Gaya Kerja ---
data <- data %>%
mutate(remote_category = case_when(
remote_ratio == 0 ~ "Onsite",
remote_ratio == 50 ~ "Hybrid",
remote_ratio == 100 ~ "Remote",
TRUE ~ NA_character_
)) %>%
filter(!is.na(remote_category))
# Hitung total jumlah pekerjaan per kombinasi job_title dan remote_category
remote_dist_filtered <- data %>%
group_by(job_title, remote_category) %>%
summarise(Jumlah = n(), .groups = "drop")
#Ambil Top 10 job_title dengan jumlah total pekerjaan terbanyak
top_10_jobtitle <- remote_dist_filtered %>%
group_by(job_title) %>%
summarise(total = sum(Jumlah)) %>%
arrange(desc(total)) %>%
slice_head(n = 10) %>%
pull(job_title)
#Filter hanya job_title top 10
remote_dist_filtered <- remote_dist_filtered %>%
filter(job_title %in% top_10_jobtitle)
p_gayakerja <- ggplot(remote_dist_filtered, aes(x = reorder(job_title, -Jumlah), y = Jumlah, fill = remote_category)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Jumlah),
position = position_stack(vjust = 0.5),
size = 3,
color = "white") +
scale_fill_manual(
values = c(
"Remote" = "#8E44AD", # ungu tua
"Hybrid" = "#BB8FCE", # ungu muda
"Onsite" = "#D2B4DE" # ungu sangat muda
)
) +
labs(
title = "Jenis Gaya Kerja Berdasarkan Pekerjaan",
subtitle = "Top 10 Pekerjaan dengan Distribusi Remote, Hybrid, dan Onsite",
x = "Job Title",
y = "Jumlah Pekerjaan",
fill = "Gaya Kerja"
) +
theme_minimal(base_size = 12) +
theme(
panel.background = element_rect(fill = "transparent", color = NA),
plot.background = element_rect(fill = "transparent", color = NA),
legend.background = element_rect(fill = "transparent"),
legend.box.background = element_rect(fill = "transparent"),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold", size = 16, color = "#6C3483"),
plot.subtitle = element_text(size = 12, color = "#A569BD")
)
# Simpan ke file PNG transparan
ggsave("visual-remote-distribusi.png",
width = 12, height = 6.5, dpi = 300, bg = "transparent")
# Tampilkan Plot
print(p_gayakerja)
Sebagian besar pekerjaan di bidang AI memungkinkan kerja remote, terutama untuk posisi seperti Data Engineer, Machine Learning Engineer, dan Software Engineer, yang punya jumlah lowongan remote tertinggi.Ini menunjukkan bahwa fleksibilitas kerja tinggi di bidang teknologi dan AI.
# --- 9. Boxplot Gaji berdasarkan Gaya Kerja ---
# Buat kategori remote
data <- data %>%
mutate(remote_category = case_when(
remote_ratio == 0 ~ "Onsite",
remote_ratio == 50 ~ "Hybrid",
remote_ratio == 100 ~ "Remote",
TRUE ~ NA_character_
)) %>%
filter(!is.na(remote_category))
# Urutkan kategori
data$remote_category <- factor(data$remote_category, levels = c("Onsite", "Hybrid", "Remote"))
# Plot
boxplot_gaji_gayakerja <- ggplot(data, aes(x = remote_category, y = salary_usd, fill = remote_category)) +
geom_boxplot(outlier.alpha = 0.15, width = 0.6, color = "black", outlier.size = 1) +
scale_fill_manual(
values = c(
"Remote" = "#8E44AD",
"Hybrid" = "#A569BD",
"Onsite" = "#D2B4DE"
)
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
labs(
title = "Perbandingan Gaji Berdasarkan Gaya Kerja",
subtitle = "Remote ternyata tetap punya rentang gaji kompetitif",
x = "Gaya Kerja",
y = "Gaji (USD)"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 16, color = "#6C3483", hjust = 0.5),
plot.subtitle = element_text(size = 12, color = "#A569BD", hjust = 0.5),
axis.title.x = element_text(size = 13),
axis.title.y = element_text(size = 13),
axis.text = element_text(size = 11),
legend.position = "none",
plot.margin = margin(t = 20, r = 10, b = 10, l = 10)
)
# Simpan plot
ggsave("boxplot-salary-remote-fixed-axisfix.png", plot = boxplot_gaji_gayakerja,
width = 8.5, height = 6, dpi = 300, bg = "white")
# Tampilkan
print(boxplot_gaji_gayakerja)
Gaya kerja tidak menentukan gaji secara signifikan di bidang AI. Bahkan, pekerjaan remote tetap menawarkan gaji kompetitif dengan potensi median yang sedikit lebih tinggi.
# --- 10. Wordcloud Skill Paling Sering Muncul secara General ---
# 1. wordcloud general
# Pisah skill berdasarkan koma
skill_words <- data %>%
select(required_skills) %>%
separate_rows(required_skills, sep = ",") %>%
mutate(required_skills = str_trim(required_skills)) %>%
count(required_skills, sort = TRUE)
# Wordcloud skill general all perusahaan
set.seed(123)
wordcloud(words = skill_words$required_skills,
freq = skill_words$n,
min.freq = 2,
scale = c(4, 0.5),
colors = brewer.pal(8, "BuPu"))
png("wordcloud_umum.png", width = 800, height = 600, bg = "white")
wordcloud(words = skill_words$required_skills,
freq = skill_words$n,
min.freq = 2,
scale = c(4, 0.5),
colors = brewer.pal(8, "BuPu"))
dev.off()
## png
## 2
Python dan SQL adalah fondasi utama dalam dunia AI, keduanya paling sering diminta dan wajib dipelajari oleh siapa pun yang ingin masuk bidang ini.Di sisi lain, skill lanjutan seperti Kubernetes, TensorFlow, dan PyTorch juga makin dicari, menandakan bahwa penguasaan tools teknis dan framework machine learning/deployment makin krusial.
# --- 11. BarChart 10 Skill Paling Banyak Dicari ---
# Bersihkan kolom skill dan pecah jadi baris
skill_df <- data %>%
filter(!is.na(required_skills)) %>%
separate_rows(required_skills, sep = ",\\s*") %>% # pisah berdasarkan koma + spasi opsional
mutate(required_skills = str_trim(required_skills)) # hapus spasi tambahan
# Hitung frekuensi skill
top_skill <- skill_df %>%
group_by(required_skills) %>%
summarise(freq = n()) %>%
arrange(desc(freq)) %>%
slice_max(order_by = freq, n = 10) # ambil 10 teratas
# Urutkan faktor untuk ggplot (supaya tidak alfabetik)
top_skill <- top_skill %>%
mutate(required_skills = fct_reorder(required_skills, freq))
# Plot bar chart
ggplot(top_skill, aes(x = required_skills, y = freq)) +
geom_bar(stat = "identity", fill = "#A569BD") +
labs(
title = "📈 10 Skill Paling Banyak Diminta",
subtitle = "Python dan SQL tetap raja. LLM naik daun. Skill visualisasi & tools tetap relevan.",
x = "Skill",
y = "Jumlah Kemunculan"
) +
theme_minimal(base_size = 13) +
coord_flip()
Python dan SQL adalah fondasi utama dalam dunia AI, keduanya paling sering diminta dan wajib dipelajari oleh siapa pun yang ingin masuk bidang ini.Di sisi lain, skill lanjutan seperti Kubernetes, TensorFlow, dan PyTorch juga makin dicari, menandakan bahwa penguasaan tools teknis dan framework machine learning/deployment makin krusial.
# --- 12. Wordcloud Skill by Top 5 Pekerjaan Gaji Tertinggi---
# Wordcloud skill by perusahaan top 5 rata2 gaji tertinggi
# 1. Hitung rata-rata gaji per perusahaan
mean_salary_company <- data %>%
group_by(company_size) %>%
summarise(mean_salary = mean(salary_usd, na.rm = TRUE)) %>%
arrange(desc(mean_salary))
# 2. Ambil 5 perusahaan dengan gaji rata-rata tertinggi
top5_companies <- mean_salary_company %>%
slice(1:5) %>%
pull(company_size)
# 3. Filter data berdasarkan 5 perusahaan tersebut
top5_data <- data %>%
filter(company_size %in% top5_companies)
# 4. Ekstraksi skill dan hitung frekuensi
top5_skill_words <- top5_data %>%
select(required_skills) %>%
separate_rows(required_skills, sep = ",") %>%
mutate(required_skills = str_trim(required_skills)) %>%
filter(required_skills != "", !is.na(required_skills)) %>%
count(required_skills, sort = TRUE)
# 5. Buat dan simpan wordcloud
set.seed(123)
wordcloud(words = top5_skill_words$required_skills,
freq = top5_skill_words$n,
min.freq = 1,
scale = c(4, 0.5),
colors = brewer.pal(8, "PuOr"))
png("wordcloud_top5_companies.png", width = 800, height = 600, bg = "white")
wordcloud(words = top5_skill_words$required_skills,
freq = top5_skill_words$n,
min.freq = 1,
scale = c(4, 0.5),
colors = brewer.pal(8, "PuOr"))
dev.off()
## png
## 2
Skill teknikal seperti Python, SQL, Kubernetes, TensorFlow, dan PyTorch mendominasi pekerjaan bergaji tertinggi di dunia AI.
# --- 13.Stacked Column Chart Pekerjaan by Education ---
data_filtered <- data %>%
filter(education_required %in% c("Associate", "Bachelor", "Master", "PhD")) %>%
group_by(job_title, education_required) %>%
summarise(jumlah = n(), .groups = "drop")
top_jobs <- data_filtered %>%
group_by(job_title) %>%
summarise(total = sum(jumlah)) %>%
arrange(desc(total)) %>%
slice_max(order_by = total, n = 5) %>%
pull(job_title)
data_top <- data_filtered %>%
filter(job_title %in% top_jobs)
barchart_edu <- ggplot(data_top, aes(x = fct_reorder(job_title, -jumlah, .fun = sum), y = jumlah, fill = education_required)) +
geom_bar(stat = "identity", width = 0.7) +
scale_fill_manual(values = c(
"Associate" = "#f4ecf7",
"Bachelor" = "#d2b4de",
"Master" = "#a569bd",
"PhD" = "#6c3483"
)) +
labs(
title = "Pekerjaan Berdasarkan Tingkat Pendidikan (Top 5)",
subtitle = "Perbandingan lulusan Associate, S1, S2, dan S3",
x = NULL,
y = "Jumlah Pekerja",
fill = "Tingkat Pendidikan"
) +
theme_minimal(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = 20, color = "purple4", hjust = 0.5),
plot.subtitle = element_text(size = 14, margin = margin(b = 15), color = "grey30", hjust = 0.5),
axis.text.x = element_text(size = 13, angle = 15, hjust = 1, color = "grey30"),
axis.text.y = element_text(size = 13, color = "grey30"),
axis.title = element_text(color = "grey30"),
legend.title = element_text(color = "#d2b4de"),
legend.text = element_text(color = "grey2"),
legend.position = "top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent", color = NA),
plot.background = element_rect(fill = "transparent", color = NA),
plot.margin = margin(t = 40, r = 30, b = 40, l = 30)
) +
coord_cartesian(clip = "off")
# Simpan sebagai PNG
ggsave(
filename = "stacked_barchart_jobtitle_top5_clean.png",
plot = barchart_edu,
width = 10,
height = 7,
dpi = 300,
bg = "transparent"
)
# Tampilkan
print(barchart_edu)
Lulusan S1 punya banyak peluang di dunia AI, terutama untuk posisi seperti AI Software Engineer dan Machine Learning Engineer. Sementara itu, lulusan S2 dan S3 (Master & PhD) lebih banyak ditemukan pada role yang fokus pada riset dan arsitektur sistem seperti AI Researcher atau AI Architect.