Tujuan dari tulisan ini adalah untuk memberikan panduan dasar yang dapat Anda gunakan untuk memprediksi karyawan yang memiliki kinerja atau performa tinggi/Talent yang siap menjadi Future Leader memiliki aspek-aspek atau variabel-variabel apa saja yang paling berpengaruh
Memprediski talent dengan HR Analytics
1: Bagaimana mengidentifikasi berbagai variabel karyawan menggunakan teknik dasar statistik deskriptif.
Bagaimana membangun model prediktif menggunakan beberapa model prediktif sederhana.
Bagaimana mengukur akurasi model.
Asumsinya kita ingin mengetahui atau mampu memprediksi karyawan yang menjadi Talent atau (0=bukan Talent, 1=Talent).
rm(list = ls())
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(caTools) # splitting sample
library(ROCR) # pengukuran ROC/ AUC
library(pROC) # pengukuran ROC/ AUC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(rattle) # visualisasi model prediktif
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.4.0 Copyright (c) 2006-2020 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
## Loading required package: rpart
library(RColorBrewer)
library(psych) # untuk korelasi biserial
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
setwd("D:/R/hranalytics/Rmd")
d <- read.csv("datatalent.csv",header=TRUE,sep=";")
dim(d) # jumlah data
## [1] 350 40
colnames(d) # nama-nama variabel
## [1] "No" "Batch"
## [3] "Wilayah" "Gender"
## [5] "Status_Sipil" "Usia"
## [7] "Pendidikan" "Jurusan"
## [9] "Grade" "Pengalaman_Kerja"
## [11] "Pengalaman_di_A_P_K" "Pengalaman_Sales"
## [13] "Pengalaman_Organisasi" "Pengurus_Organisasi"
## [15] "nilai_training" "DISC1"
## [17] "DISC2" "DISC3"
## [19] "accuracy" "verbal_reasoning"
## [21] "working_memory" "numerical_reasoning"
## [23] "spatial_reasoning" "Fluid_intelligence"
## [25] "arithmetics" "component"
## [27] "word_relation" "figure_classification"
## [29] "crystallized_intelligence" "log_berpikir"
## [31] "k_numerikal" "d_analisa"
## [33] "k_verbal" "o_hasil"
## [35] "fleks" "s_kerja"
## [37] "m_prestasi" "kerjasama"
## [39] "k_interpersonal" "talent"
summary(d)
## No Batch Wilayah Gender
## Min. : 1.00 Min. :71.00 palembang: 52 LAKI-LAKI:103
## 1st Qu.: 88.25 1st Qu.:71.00 bandung : 44 PEREMPUAN:247
## Median :175.50 Median :72.00 jakarta 3: 44
## Mean :175.50 Mean :72.47 denpasar : 28
## 3rd Qu.:262.75 3rd Qu.:74.00 jakarta 1: 24
## Max. :350.00 Max. :74.00 jakarta 2: 19
## (Other) :139
## Status_Sipil Usia Pendidikan Jurusan Grade
## Min. :0.00000 Min. :20.00 D3: 66 MARKETING:116 EXECUTIVE: 3
## 1st Qu.:0.00000 1st Qu.:23.00 D4: 1 BISNIS : 83 JUNIOR : 15
## Median :0.00000 Median :24.00 S1:282 BAHASA : 40 MT :319
## Mean :0.06857 Mean :25.13 S2: 1 AKUNTANSI: 36 SENIOR : 13
## 3rd Qu.:0.00000 3rd Qu.:27.00 EKONOMI : 30
## Max. :1.00000 Max. :37.00 HUKUM : 22
## (Other) : 23
## Pengalaman_Kerja
## Belum memiliki pengalaman kerja :134
## Memiliki pengalaman <2 tahun : 1
## Memiliki Pengalaman kerja < 1 tahun : 73
## Memiliki Pengalaman kerja > 1 tahun : 1
## Memiliki Pengalaman kerja > 2 tahun : 87
## Memiliki Pengalaman kerja 1 - 2 tahun: 54
##
## Pengalaman_di_A_P_K
## Belum memiliki pengalaman kerja:242
## Memiliki Pengalaman < 1 tahun : 34
## Memiliki Pengalaman > 1 tahun : 1
## Memiliki Pengalaman > 2 tahun : 42
## Memiliki Pengalaman 1 - 2 tahun: 31
##
##
## Pengalaman_Sales Pengalaman_Organisasi
## Belum memiliki pengalaman kerja :200 Min. :0.0000
## Memiliki pengalaman < 1 tahun : 67 1st Qu.:1.0000
## Memiliki Pengalaman > 2 tahun : 44 Median :1.0000
## Memiliki Pengalaman 1 - 2 tahun : 37 Mean :0.7829
## Memiliki Pengalaman kerja > 2 tahun: 2 3rd Qu.:1.0000
## Max. :1.0000
##
## Pengurus_Organisasi nilai_training DISC1 DISC2
## Min. :0.0000 Min. :12.00 IS : 43 SC : 63
## 1st Qu.:0.0000 1st Qu.:74.25 Tight : 38 Uppershift: 25
## Median :1.0000 Median :81.00 SC : 33 SIC : 24
## Mean :0.5143 Mean :79.40 IC : 29 SCI : 19
## 3rd Qu.:1.0000 3rd Qu.:84.00 I : 22 DSC : 18
## Max. :1.0000 Max. :97.00 S : 19 CDS : 17
## (Other):166 (Other) :184
## DISC3 accuracy verbal_reasoning working_memory
## Tight : 83 Min. : 49.00 Min. : 46.00 Min. : 57.0
## SC : 36 1st Qu.: 89.50 1st Qu.: 84.00 1st Qu.:103.0
## CS : 22 Median :101.00 Median : 94.00 Median :109.0
## IS : 20 Mean : 97.75 Mean : 94.17 Mean :107.6
## SIC : 19 3rd Qu.:110.00 3rd Qu.:105.00 3rd Qu.:117.0
## CD : 17 Max. :145.00 Max. :155.00 Max. :145.0
## (Other):153
## numerical_reasoning spatial_reasoning Fluid_intelligence arithmetics
## Min. : 57.00 Min. : 69.00 Min. : 63.0 Min. : 0.00
## 1st Qu.: 88.00 1st Qu.: 83.00 1st Qu.: 95.0 1st Qu.: 9.00
## Median :102.00 Median : 88.00 Median :100.0 Median :13.00
## Mean : 98.55 Mean : 92.94 Mean :101.5 Mean :12.01
## 3rd Qu.:104.00 3rd Qu.: 96.00 3rd Qu.:107.0 3rd Qu.:15.00
## Max. :154.00 Max. :166.00 Max. :153.0 Max. :26.00
##
## component word_relation figure_classification crystallized_intelligence
## Min. : 4.0 Min. : 1.00 Min. : 0.00 Min. : 71.00
## 1st Qu.: 8.0 1st Qu.: 8.25 1st Qu.:12.00 1st Qu.: 91.00
## Median :11.0 Median :12.00 Median :16.00 Median :100.00
## Mean :11.7 Mean :12.07 Mean :15.65 Mean : 99.96
## 3rd Qu.:15.0 3rd Qu.:15.00 3rd Qu.:19.00 3rd Qu.:107.00
## Max. :27.0 Max. :24.00 Max. :29.00 Max. :135.00
##
## log_berpikir k_numerikal d_analisa k_verbal o_hasil
## Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:3.000
## Median :3.00 Median :3.000 Median :2.000 Median :3.000 Median :3.000
## Mean :2.72 Mean :2.837 Mean :2.171 Mean :2.846 Mean :3.454
## 3rd Qu.:3.00 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.00 Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
##
## fleks s_kerja m_prestasi kerjasama k_interpersonal
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :2.00 Min. :2.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:3.00 1st Qu.:3.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.00 Median :3.000
## Mean :2.954 Mean :2.851 Mean :2.917 Mean :3.16 Mean :2.977
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:4.00 3rd Qu.:3.000
## Max. :5.000 Max. :4.000 Max. :5.000 Max. :5.00 Max. :4.000
##
## talent
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.3457
## 3rd Qu.:1.0000
## Max. :1.0000
##
dim(d) # dataset size
## [1] 350 40
summary(d) # basic summary stats
## No Batch Wilayah Gender
## Min. : 1.00 Min. :71.00 palembang: 52 LAKI-LAKI:103
## 1st Qu.: 88.25 1st Qu.:71.00 bandung : 44 PEREMPUAN:247
## Median :175.50 Median :72.00 jakarta 3: 44
## Mean :175.50 Mean :72.47 denpasar : 28
## 3rd Qu.:262.75 3rd Qu.:74.00 jakarta 1: 24
## Max. :350.00 Max. :74.00 jakarta 2: 19
## (Other) :139
## Status_Sipil Usia Pendidikan Jurusan Grade
## Min. :0.00000 Min. :20.00 D3: 66 MARKETING:116 EXECUTIVE: 3
## 1st Qu.:0.00000 1st Qu.:23.00 D4: 1 BISNIS : 83 JUNIOR : 15
## Median :0.00000 Median :24.00 S1:282 BAHASA : 40 MT :319
## Mean :0.06857 Mean :25.13 S2: 1 AKUNTANSI: 36 SENIOR : 13
## 3rd Qu.:0.00000 3rd Qu.:27.00 EKONOMI : 30
## Max. :1.00000 Max. :37.00 HUKUM : 22
## (Other) : 23
## Pengalaman_Kerja
## Belum memiliki pengalaman kerja :134
## Memiliki pengalaman <2 tahun : 1
## Memiliki Pengalaman kerja < 1 tahun : 73
## Memiliki Pengalaman kerja > 1 tahun : 1
## Memiliki Pengalaman kerja > 2 tahun : 87
## Memiliki Pengalaman kerja 1 - 2 tahun: 54
##
## Pengalaman_di_A_P_K
## Belum memiliki pengalaman kerja:242
## Memiliki Pengalaman < 1 tahun : 34
## Memiliki Pengalaman > 1 tahun : 1
## Memiliki Pengalaman > 2 tahun : 42
## Memiliki Pengalaman 1 - 2 tahun: 31
##
##
## Pengalaman_Sales Pengalaman_Organisasi
## Belum memiliki pengalaman kerja :200 Min. :0.0000
## Memiliki pengalaman < 1 tahun : 67 1st Qu.:1.0000
## Memiliki Pengalaman > 2 tahun : 44 Median :1.0000
## Memiliki Pengalaman 1 - 2 tahun : 37 Mean :0.7829
## Memiliki Pengalaman kerja > 2 tahun: 2 3rd Qu.:1.0000
## Max. :1.0000
##
## Pengurus_Organisasi nilai_training DISC1 DISC2
## Min. :0.0000 Min. :12.00 IS : 43 SC : 63
## 1st Qu.:0.0000 1st Qu.:74.25 Tight : 38 Uppershift: 25
## Median :1.0000 Median :81.00 SC : 33 SIC : 24
## Mean :0.5143 Mean :79.40 IC : 29 SCI : 19
## 3rd Qu.:1.0000 3rd Qu.:84.00 I : 22 DSC : 18
## Max. :1.0000 Max. :97.00 S : 19 CDS : 17
## (Other):166 (Other) :184
## DISC3 accuracy verbal_reasoning working_memory
## Tight : 83 Min. : 49.00 Min. : 46.00 Min. : 57.0
## SC : 36 1st Qu.: 89.50 1st Qu.: 84.00 1st Qu.:103.0
## CS : 22 Median :101.00 Median : 94.00 Median :109.0
## IS : 20 Mean : 97.75 Mean : 94.17 Mean :107.6
## SIC : 19 3rd Qu.:110.00 3rd Qu.:105.00 3rd Qu.:117.0
## CD : 17 Max. :145.00 Max. :155.00 Max. :145.0
## (Other):153
## numerical_reasoning spatial_reasoning Fluid_intelligence arithmetics
## Min. : 57.00 Min. : 69.00 Min. : 63.0 Min. : 0.00
## 1st Qu.: 88.00 1st Qu.: 83.00 1st Qu.: 95.0 1st Qu.: 9.00
## Median :102.00 Median : 88.00 Median :100.0 Median :13.00
## Mean : 98.55 Mean : 92.94 Mean :101.5 Mean :12.01
## 3rd Qu.:104.00 3rd Qu.: 96.00 3rd Qu.:107.0 3rd Qu.:15.00
## Max. :154.00 Max. :166.00 Max. :153.0 Max. :26.00
##
## component word_relation figure_classification crystallized_intelligence
## Min. : 4.0 Min. : 1.00 Min. : 0.00 Min. : 71.00
## 1st Qu.: 8.0 1st Qu.: 8.25 1st Qu.:12.00 1st Qu.: 91.00
## Median :11.0 Median :12.00 Median :16.00 Median :100.00
## Mean :11.7 Mean :12.07 Mean :15.65 Mean : 99.96
## 3rd Qu.:15.0 3rd Qu.:15.00 3rd Qu.:19.00 3rd Qu.:107.00
## Max. :27.0 Max. :24.00 Max. :29.00 Max. :135.00
##
## log_berpikir k_numerikal d_analisa k_verbal o_hasil
## Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:3.000
## Median :3.00 Median :3.000 Median :2.000 Median :3.000 Median :3.000
## Mean :2.72 Mean :2.837 Mean :2.171 Mean :2.846 Mean :3.454
## 3rd Qu.:3.00 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.00 Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
##
## fleks s_kerja m_prestasi kerjasama k_interpersonal
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :2.00 Min. :2.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:3.00 1st Qu.:3.000
## Median :3.000 Median :3.000 Median :3.000 Median :3.00 Median :3.000
## Mean :2.954 Mean :2.851 Mean :2.917 Mean :3.16 Mean :2.977
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:4.00 3rd Qu.:3.000
## Max. :5.000 Max. :4.000 Max. :5.000 Max. :5.00 Max. :4.000
##
## talent
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.3457
## 3rd Qu.:1.0000
## Max. :1.0000
##
str(d)
## 'data.frame': 350 obs. of 40 variables:
## $ No : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Batch : int 71 71 71 71 71 71 71 71 71 71 ...
## $ Wilayah : Factor w/ 20 levels "aceh","bandung",..: 1 1 1 2 2 2 2 2 2 2 ...
## $ Gender : Factor w/ 2 levels "LAKI-LAKI","PEREMPUAN": 2 2 2 1 1 2 2 2 2 2 ...
## $ Status_Sipil : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Usia : int 24 22 21 32 30 23 22 24 22 23 ...
## $ Pendidikan : Factor w/ 4 levels "D3","D4","S1",..: 3 1 1 3 3 1 3 3 3 3 ...
## $ Jurusan : Factor w/ 8 levels "AGRIBISNIS","AKUNTANSI",..: 7 4 4 7 7 7 7 7 7 2 ...
## $ Grade : Factor w/ 4 levels "EXECUTIVE","JUNIOR",..: 4 4 4 2 3 3 3 3 3 3 ...
## $ Pengalaman_Kerja : Factor w/ 6 levels "Belum memiliki pengalaman kerja",..: 3 3 1 6 3 6 1 3 1 1 ...
## $ Pengalaman_di_A_P_K : Factor w/ 5 levels "Belum memiliki pengalaman kerja",..: 1 1 1 4 1 1 1 1 1 1 ...
## $ Pengalaman_Sales : Factor w/ 5 levels "Belum memiliki pengalaman kerja",..: 1 2 1 1 1 4 1 1 1 1 ...
## $ Pengalaman_Organisasi : int 0 1 1 1 1 0 1 1 1 1 ...
## $ Pengurus_Organisasi : int 0 1 0 1 1 0 1 0 1 1 ...
## $ nilai_training : int 85 73 79 83 81 72 83 83 85 97 ...
## $ DISC1 : Factor w/ 31 levels "C","CD","CDI",..: 23 26 22 8 26 28 28 30 19 30 ...
## $ DISC2 : Factor w/ 35 levels "CD","CDI","CDS",..: 35 28 19 6 6 25 25 18 16 25 ...
## $ DISC3 : Factor w/ 35 levels "C","CD","CDI",..: 22 24 34 8 8 25 25 5 34 25 ...
## $ accuracy : int 99 93 67 103 127 101 127 122 110 97 ...
## $ verbal_reasoning : int 105 119 80 80 84 73 101 101 130 101 ...
## $ working_memory : int 126 114 110 114 106 95 117 129 127 115 ...
## $ numerical_reasoning : int 104 65 92 96 81 104 100 120 85 100 ...
## $ spatial_reasoning : int 101 89 79 96 98 84 89 74 149 83 ...
## $ Fluid_intelligence : int 107 96 123 98 99 111 107 109 120 99 ...
## $ arithmetics : int 24 13 17 13 19 15 19 18 20 11 ...
## $ component : int 9 9 5 16 13 11 13 13 17 18 ...
## $ word_relation : int 11 10 19 20 22 12 19 18 16 14 ...
## $ figure_classification : int 17 12 7 14 17 13 22 9 22 17 ...
## $ crystallized_intelligence: int 104 93 97 112 116 101 119 109 122 108 ...
## $ log_berpikir : int 4 3 4 3 3 2 4 4 5 3 ...
## $ k_numerikal : int 4 2 3 3 3 4 4 5 3 3 ...
## $ d_analisa : int 2 1 1 2 3 2 3 2 4 2 ...
## $ k_verbal : int 3 4 3 3 3 2 4 4 5 4 ...
## $ o_hasil : int 4 4 4 4 4 4 4 4 3 3 ...
## $ fleks : int 3 3 3 2 3 3 4 3 2 3 ...
## $ s_kerja : int 3 4 2 4 4 3 4 3 2 3 ...
## $ m_prestasi : int 3 3 2 3 3 2 2 3 5 2 ...
## $ kerjasama : int 4 2 4 3 3 3 3 4 3 2 ...
## $ k_interpersonal : int 3 3 4 3 3 3 3 3 3 3 ...
## $ talent : int 0 0 1 0 1 1 1 1 1 1 ...
colnames(d)
## [1] "No" "Batch"
## [3] "Wilayah" "Gender"
## [5] "Status_Sipil" "Usia"
## [7] "Pendidikan" "Jurusan"
## [9] "Grade" "Pengalaman_Kerja"
## [11] "Pengalaman_di_A_P_K" "Pengalaman_Sales"
## [13] "Pengalaman_Organisasi" "Pengurus_Organisasi"
## [15] "nilai_training" "DISC1"
## [17] "DISC2" "DISC3"
## [19] "accuracy" "verbal_reasoning"
## [21] "working_memory" "numerical_reasoning"
## [23] "spatial_reasoning" "Fluid_intelligence"
## [25] "arithmetics" "component"
## [27] "word_relation" "figure_classification"
## [29] "crystallized_intelligence" "log_berpikir"
## [31] "k_numerikal" "d_analisa"
## [33] "k_verbal" "o_hasil"
## [35] "fleks" "s_kerja"
## [37] "m_prestasi" "kerjasama"
## [39] "k_interpersonal" "talent"
# A Nicer color palette
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
agg_wilayah <- aggregate(talent ~ Wilayah, data = d, mean)
print(agg_wilayah)
## Wilayah talent
## 1 aceh 0.3333333
## 2 bandung 0.4318182
## 3 banjarmasin 0.3333333
## 4 banjarmasin a 0.7142857
## 5 banjarmasin b 0.0000000
## 6 denpasar 0.1785714
## 7 jakarta 1 0.5833333
## 8 jakarta 2 0.6315789
## 9 jakarta 3 0.2272727
## 10 jayapura 0.3750000
## 11 makassar 0.3846154
## 12 malang 0.2500000
## 13 manado 0.4444444
## 14 medan 0.4444444
## 15 padang 0.7058824
## 16 palembang 0.1923077
## 17 pekanbaru 0.4000000
## 18 semarang 0.1428571
## 19 surabaya 0.0000000
## 20 yogyakarta 0.2352941
p_wilayah<-ggplot(agg_wilayah, aes(x = Wilayah, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[3]) +
ggtitle("Talent Rate by Wilayah") +
labs (y = "Proportion Talent", x = "Wilayah") + coord_flip()
agg_sex <- aggregate(talent ~ Gender, data = d, mean)
print(agg_sex)
## Gender talent
## 1 LAKI-LAKI 0.3786408
## 2 PEREMPUAN 0.3319838
p_sex<-ggplot(agg_sex, aes(x = Gender, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[3]) +
ggtitle("Talent by Gender") +
labs (y = "Proportion Talent", x = "Gender")
agg_usia <- aggregate(talent ~ Usia, data = d, mean)
print(agg_usia)
## Usia talent
## 1 20 0.0000000
## 2 21 0.3333333
## 3 22 0.3958333
## 4 23 0.3968254
## 5 24 0.2777778
## 6 25 0.4500000
## 7 26 0.4333333
## 8 27 0.2333333
## 9 28 0.2352941
## 10 29 0.2500000
## 11 30 0.3333333
## 12 31 0.1428571
## 13 32 0.4285714
## 14 33 0.0000000
## 15 35 0.4000000
## 16 37 0.0000000
p_usia<-ggplot(agg_usia, aes(x = Usia, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[3]) +
ggtitle("Talent Rate by Usia") +
labs (y = "Proportion Talent", x = "Usia")
agg_Status_Sipil <- aggregate(talent ~ Status_Sipil, data = d, mean)
print(agg_Status_Sipil)
## Status_Sipil talent
## 1 0 0.3435583
## 2 1 0.3750000
p_Status_Sipil<-ggplot(agg_Status_Sipil, aes(x = Status_Sipil, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[3]) +
ggtitle("Talent Rate by Status Pernikahan") +
labs (y = "Proportion Talent", x = "Status") + coord_flip()
agg_Grade <- aggregate(talent ~ Grade, data = d, mean)
print(agg_Grade)
## Grade talent
## 1 EXECUTIVE 0.0000000
## 2 JUNIOR 0.4000000
## 3 MT 0.3448276
## 4 SENIOR 0.3846154
p_Grade<-ggplot(agg_Grade, aes(x = Grade, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[3]) +
ggtitle("Talent Rate by Grade") +
labs (y = "Proportion Talent", x = "Grade")
Aspek Pernikahan dan Grade Belum menunjukkan probabilitas yang signifikan
library(grid)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(p_wilayah,p_usia,ncol=2,top = "Wilayah dan Usia")
grid.arrange(p_Status_Sipil,p_Grade,ncol=2,top = "Status Pernikahan")
Probabilitas talent yang signifikan di wilayah Padang dan Banjarmasin a, berusia 25 dan 32 tahun
agg_Pendidikan <- aggregate(talent ~ Pendidikan, data = d, mean)
print(agg_Pendidikan)
## Pendidikan talent
## 1 D3 0.3181818
## 2 D4 0.0000000
## 3 S1 0.3510638
## 4 S2 1.0000000
p_pendidikan<-ggplot(agg_Pendidikan, aes(x = Pendidikan, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[3]) +
ggtitle("Pendidikan") +
labs (y = "Proportion Talent", x = "Pendidikan")
agg_Pengalaman_Kerja <- aggregate(talent ~ Pengalaman_Kerja, data = d, mean)
print(agg_Pengalaman_Kerja)
## Pengalaman_Kerja talent
## 1 Belum memiliki pengalaman kerja 0.3656716
## 2 Memiliki pengalaman <2 tahun 0.0000000
## 3 Memiliki Pengalaman kerja < 1 tahun 0.3424658
## 4 Memiliki Pengalaman kerja > 1 tahun 0.0000000
## 5 Memiliki Pengalaman kerja > 2 tahun 0.2988506
## 6 Memiliki Pengalaman kerja 1 - 2 tahun 0.3888889
p_pengalaman_kerja<-ggplot(agg_Pengalaman_Kerja, aes(x = Pengalaman_Kerja, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[3]) +
ggtitle("Pengalaman Kerja") +
labs (y = "Proportion Talent", x = "Pengalaman_Kerja")+ coord_flip()
agg_Pengalaman_di_A_P_K <- aggregate(talent ~ Pengalaman_di_A_P_K, data = d, mean)
print(agg_Pengalaman_di_A_P_K)
## Pengalaman_di_A_P_K talent
## 1 Belum memiliki pengalaman kerja 0.3719008
## 2 Memiliki Pengalaman < 1 tahun 0.3529412
## 3 Memiliki Pengalaman > 1 tahun 0.0000000
## 4 Memiliki Pengalaman > 2 tahun 0.2142857
## 5 Memiliki Pengalaman 1 - 2 tahun 0.3225806
p_Pengalaman_di_A_P_K<-ggplot(agg_Pengalaman_di_A_P_K, aes(x = Pengalaman_di_A_P_K, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[3]) +
ggtitle("Pengalaman di bidang Keuangan") +
labs (y = "Proportion Talent", x = "Pengalaman di bidang Keuangan") + coord_flip()
agg_Pengalaman_Sales <- aggregate(talent ~ Pengalaman_Sales, data = d, mean)
print(agg_Pengalaman_Sales)
## Pengalaman_Sales talent
## 1 Belum memiliki pengalaman kerja 0.3800000
## 2 Memiliki pengalaman < 1 tahun 0.2835821
## 3 Memiliki Pengalaman > 2 tahun 0.2272727
## 4 Memiliki Pengalaman 1 - 2 tahun 0.4054054
## 5 Memiliki Pengalaman kerja > 2 tahun 0.5000000
p_Pengalaman_Sales<-ggplot(agg_Pengalaman_Sales, aes(x = Pengalaman_Sales, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[3]) +
ggtitle("Pengalaman Sales") +
labs (y = "Proportion Talent", x = "Pengalaman Sales") + coord_flip()
grid.arrange(p_pendidikan,p_pengalaman_kerja,p_Pengalaman_di_A_P_K,p_Pengalaman_Sales,ncol=2,top = "Grafik Probabilitas talent berdasarkan Pengalaman")
Probabilitas yang bisa menjadi talent: pendidikan S2, Memiliki pengalam kerja di > 2 tahun
agg_grafik1 <- aggregate(talent ~ DISC1, data = d, mean)
print(agg_grafik1)
## DISC1 talent
## 1 C 0.4285714
## 2 CD 0.3076923
## 3 CDI 0.5000000
## 4 CDS 0.2500000
## 5 CI 0.4285714
## 6 CID 0.2500000
## 7 CIS 0.3333333
## 8 CS 0.2352941
## 9 CSI 0.5000000
## 10 D 0.3333333
## 11 DC 0.2857143
## 12 DCS 0.0000000
## 13 DI 0.0000000
## 14 DS 0.3333333
## 15 I 0.4090909
## 16 IC 0.3793103
## 17 ICD 0.2000000
## 18 ICS 0.4705882
## 19 ID 0.3846154
## 20 IDC 0.5000000
## 21 IDS 0.5000000
## 22 IS 0.3953488
## 23 ISC 0.3846154
## 24 ISD 0.0000000
## 25 S 0.3157895
## 26 SC 0.3636364
## 27 SD 0.0000000
## 28 SI 0.2857143
## 29 SIC 0.7500000
## 30 Tight 0.2894737
## 31 Uppershift 0.2500000
p1DISC<-ggplot(agg_grafik1, aes(x = DISC1, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[4]) +
ggtitle("Talent Rate by DISC1") +
labs (y = "Proportion Talent", x = "DISC1")+ coord_flip()
agg_grafik2 <- aggregate(talent ~ DISC2, data = d, mean)
print(agg_grafik2)
## DISC2 talent
## 1 CD 0.2000000
## 2 CDI 0.5000000
## 3 CDS 0.2941176
## 4 CID 0.0000000
## 5 CIS 0.6666667
## 6 CS 0.3846154
## 7 CSD 0.3076923
## 8 CSI 0.2500000
## 9 DC 0.2000000
## 10 DCI 1.0000000
## 11 DCS 0.2857143
## 12 DI 0.3333333
## 13 DIC 0.3333333
## 14 DIS 0.3333333
## 15 DS 1.0000000
## 16 DSC 0.3333333
## 17 DSI 0.3333333
## 18 IC 0.6000000
## 19 ICS 0.6153846
## 20 ID 0.7500000
## 21 IDS 0.4000000
## 22 IS 0.5000000
## 23 ISC 0.5000000
## 24 ISD 0.2000000
## 25 SC 0.4920635
## 26 SCD 0.2222222
## 27 SCI 0.2631579
## 28 SD 0.2857143
## 29 SDC 0.1818182
## 30 SDI 0.0000000
## 31 SI 0.2500000
## 32 SIC 0.1666667
## 33 SID 0.2500000
## 34 Tight 0.1111111
## 35 Uppershift 0.1600000
p2DISC<-ggplot(agg_grafik2, aes(x = DISC2, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[4]) +
ggtitle("Talent Rate by DISC2") +
labs (y = "Proportion Total sales", x = "DISC2") + coord_flip()
agg_grafik3 <- aggregate(talent ~ DISC3, data = d, mean)
print(agg_grafik3)
## DISC3 talent
## 1 C 0.4285714
## 2 CD 0.2941176
## 3 CDI 0.4285714
## 4 CDS 0.0000000
## 5 CI 0.5000000
## 6 CID 0.0000000
## 7 CIS 0.3333333
## 8 CS 0.4090909
## 9 CSD 0.1666667
## 10 CSI 0.5000000
## 11 D 0.6666667
## 12 DC 0.0000000
## 13 DI 0.0000000
## 14 DSC 0.3333333
## 15 DSI 0.0000000
## 16 I 0.5000000
## 17 IC 0.7272727
## 18 ID 0.3333333
## 19 IDC 0.5000000
## 20 IDS 0.0000000
## 21 IS 0.5000000
## 22 ISC 0.2500000
## 23 ISD 0.0000000
## 24 S 0.3636364
## 25 SC 0.3888889
## 26 SCD 0.6666667
## 27 SCI 0.2500000
## 28 SD 0.6666667
## 29 SDC 0.0000000
## 30 SDI 1.0000000
## 31 SI 0.1538462
## 32 SIC 0.3684211
## 33 SID 0.3333333
## 34 Tight 0.2650602
## 35 Uppershift 0.5000000
p3DISC<-ggplot(agg_grafik3, aes(x = DISC3, y = talent)) + geom_bar(stat = "identity",fill = cbPalette[4]) +
ggtitle("Talent Rate by DISC3") +
labs (y = "Proportion Talent", x = "DISC3") + coord_flip()
grid.arrange(p1DISC,p2DISC,p3DISC,ncol=3,top = "Probabilitas talent berdasarkan DISC")
Profil karakteristik yang memiliki probabilitas sebagai talent: Grafik 1: SIC, Grafik2: DS, DCI, Grafik 3:SDI
par(mfrow=c(1,2)) # set the plotting area into a 1*2 array
bp_Average.GTQ<-boxplot(Fluid_intelligence~talent,
data=d,
main="Talent by Fluid Intelligence",
xlab="talent",
ylab="Fluid_intelligence",
col="green",
border="black"
)
bp_IQ<-boxplot(crystallized_intelligence~talent,
data=d,
main="Talent by Crystalized Intelligence",
xlab="talent",
ylab="crystallized_intelligence",
col="green",
border="black"
)
par(mfrow=c(1,1))
Tidak terdapat perbedaan yang signifikan tentang talent bila dilihat dari aspek crystalized intelligence maupun fluid intelligence
set.seed(42) # setting the random seed for replication
spl <- sample.split(d$talent, 2/3)
train <- d[spl,]
test <- d[!spl,]
mean(train$talent)
## [1] 0.3461538
mean(test$talent)
## [1] 0.3448276
m1 <- glm(talent ~ nilai_training + accuracy+verbal_reasoning+working_memory+numerical_reasoning+spatial_reasoning+Fluid_intelligence+arithmetics+component+word_relation+figure_classification+crystallized_intelligence+log_berpikir+k_numerikal+d_analisa+k_verbal+o_hasil+fleks+s_kerja+m_prestasi+kerjasama+k_interpersonal, data = train, family = 'binomial')
summary(m1) # getting model results
##
## Call:
## glm(formula = talent ~ nilai_training + accuracy + verbal_reasoning +
## working_memory + numerical_reasoning + spatial_reasoning +
## Fluid_intelligence + arithmetics + component + word_relation +
## figure_classification + crystallized_intelligence + log_berpikir +
## k_numerikal + d_analisa + k_verbal + o_hasil + fleks + s_kerja +
## m_prestasi + kerjasama + k_interpersonal, family = "binomial",
## data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7046 -0.7251 -0.4441 0.3008 2.4684
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -14.349224 11.514351 -1.246 0.21269
## nilai_training -0.060279 0.028750 -2.097 0.03602 *
## accuracy -0.060332 0.052807 -1.143 0.25325
## verbal_reasoning -0.045515 0.053518 -0.850 0.39507
## working_memory -0.066637 0.050519 -1.319 0.18715
## numerical_reasoning -0.064424 0.056662 -1.137 0.25555
## spatial_reasoning -0.113342 0.053677 -2.112 0.03472 *
## Fluid_intelligence 0.430496 0.248305 1.734 0.08296 .
## arithmetics -0.130177 0.152025 -0.856 0.39184
## component -0.296031 0.199982 -1.480 0.13880
## word_relation -0.195623 0.187285 -1.045 0.29624
## figure_classification -0.193048 0.117435 -1.644 0.10020
## crystallized_intelligence 0.263249 0.180943 1.455 0.14570
## log_berpikir -1.186260 0.568164 -2.088 0.03681 *
## k_numerikal -0.061875 0.441221 -0.140 0.88847
## d_analisa 0.383579 0.433120 0.886 0.37582
## k_verbal -0.028420 0.408386 -0.070 0.94452
## o_hasil 0.167659 0.354128 0.473 0.63590
## fleks -0.596721 0.340282 -1.754 0.07950 .
## s_kerja -0.333964 0.330652 -1.010 0.31249
## m_prestasi -0.974329 0.317992 -3.064 0.00218 **
## kerjasama 0.470379 0.344093 1.367 0.17162
## k_interpersonal -0.006434 0.418714 -0.015 0.98774
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 301.88 on 233 degrees of freedom
## Residual deviance: 202.97 on 211 degrees of freedom
## AIC: 248.97
##
## Number of Fisher Scoring iterations: 9
m2 <- glm(talent ~ nilai_training + accuracy+verbal_reasoning+working_memory+numerical_reasoning+spatial_reasoning+Fluid_intelligence+arithmetics+component+word_relation+figure_classification+crystallized_intelligence+log_berpikir+k_numerikal+d_analisa+k_verbal+o_hasil+fleks+s_kerja+m_prestasi+kerjasama+k_interpersonal, data = test, family = 'binomial')
summary(m2) # getting model results
##
## Call:
## glm(formula = talent ~ nilai_training + accuracy + verbal_reasoning +
## working_memory + numerical_reasoning + spatial_reasoning +
## Fluid_intelligence + arithmetics + component + word_relation +
## figure_classification + crystallized_intelligence + log_berpikir +
## k_numerikal + d_analisa + k_verbal + o_hasil + fleks + s_kerja +
## m_prestasi + kerjasama + k_interpersonal, family = "binomial",
## data = test)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5788 -0.6693 -0.4355 0.3908 2.7060
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.69809 13.47873 -0.349 0.72742
## nilai_training -0.10797 0.03985 -2.709 0.00674 **
## accuracy -0.04742 0.04499 -1.054 0.29189
## verbal_reasoning -0.06822 0.05162 -1.322 0.18630
## working_memory -0.04312 0.04916 -0.877 0.38041
## numerical_reasoning -0.02511 0.05093 -0.493 0.62203
## spatial_reasoning -0.06898 0.04753 -1.451 0.14674
## Fluid_intelligence 0.29212 0.21544 1.356 0.17513
## arithmetics 0.04558 0.17447 0.261 0.79392
## component -0.25838 0.22827 -1.132 0.25769
## word_relation -0.23420 0.24456 -0.958 0.33825
## figure_classification -0.09208 0.14341 -0.642 0.52084
## crystallized_intelligence 0.19839 0.21424 0.926 0.35445
## log_berpikir -1.16083 0.81553 -1.423 0.15462
## k_numerikal -0.68794 0.60060 -1.145 0.25203
## d_analisa -0.03532 0.56986 -0.062 0.95057
## k_verbal 1.19091 0.60375 1.973 0.04855 *
## o_hasil 0.16181 0.59219 0.273 0.78467
## fleks 0.11949 0.48992 0.244 0.80732
## s_kerja -0.35769 0.52299 -0.684 0.49402
## m_prestasi -0.78704 0.43361 -1.815 0.06951 .
## kerjasama 0.24680 0.49966 0.494 0.62135
## k_interpersonal -0.56775 0.58176 -0.976 0.32910
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 149.451 on 115 degrees of freedom
## Residual deviance: 99.819 on 93 degrees of freedom
## AIC: 145.82
##
## Number of Fisher Scoring iterations: 9
hist(m1$fitted.values, main = "Distribution of Predicted Probabilities",
xlab = "Probability of Talents", col = cbPalette[3], border = F, breaks = 50)
abline(v = .5, col = "red", lwd = 3)
prop.table(table(m1$fitted.values >= .5))
##
## FALSE TRUE
## 0.7692308 0.2307692
prop.table(table(m1$fitted.values >= .3))
##
## FALSE TRUE
## 0.5726496 0.4273504
m2_test <- predict(m2, newdata = test, type = "response")
hist(m2_test, main = "Distribution of Test Set \nPredicted Probabilities",
xlab = "Probability of Talents", col = cbPalette[3], border = F, breaks = 50)
abline(v = .5, col = "red", lwd = 3)
prop.table(table(m2_test >= .5))
##
## FALSE TRUE
## 0.7672414 0.2327586
prop.table(table(m2_test>= .3))
##
## FALSE TRUE
## 0.5603448 0.4396552
To check accuracy, we’ll build a “confusion matrix”. This allows is to compare the observed results from our test set with our predicted results.
We will set the cutoff as .5 so that anyone with a value over .5 will categorized as talent.
prop.table(table(test$talent))
##
## 0 1
## 0.6551724 0.3448276
accuracy <- table(m2_test > .5, test$talent) # confusion matrix
addmargins(table(m2_test > .5, test$talent))
##
## 0 1 Sum
## FALSE 71 18 89
## TRUE 5 22 27
## Sum 76 40 116
To get the model accuracy, we will sum the correctly classified observations (values on the diagonal) and divide by the total number of observations (3700). This will give us the proportion correct.
sum(diag(accuracy))/sum(accuracy)
## [1] 0.8017241
addmargins(accuracy)
##
## 0 1 Sum
## FALSE 71 18 89
## TRUE 5 22 27
## Sum 76 40 116
acc2 <- table(m2_test > .3, test$talent) # confusion matrix
addmargins(acc2)
##
## 0 1 Sum
## FALSE 58 7 65
## TRUE 18 33 51
## Sum 76 40 116
from https://www.r-bloggers.com/a-small-introduction-to-the-rocr-package/
opt.cut <- function(perf, pred){
cut.ind <- mapply(FUN=function(x, y, p){
d <- (x - 0)^2 + (y-1)^2
ind <- which(d == min(d))
c(sensitivity = y[[ind]], specificity = 1-x[[ind]],
cutoff = p[[ind]])
}, perf@x.values, perf@y.values, pred@cutoffs)
}
http://rocr.bioinf.mpi-sb.mpg.de/
visROC <- function(model, data, outcome){
# Core three steps from ROCR
temp_pred <- predict(model, newdata= data, type = "response")
roc_pred <- prediction(temp_pred, data[,outcome]) #create an s4 ROC object
roc_perf <- performance(roc_pred, "tpr", "fpr")
# make the plot
plot(roc_perf, colorize = TRUE, print.cutoffs.at = seq(0,1,.1),
main = "ROC Curve", lwd = 2)
abline(coef = c(0,1), col = "black", lwd = 2)
# get the optimum cutoff
opt <- opt.cut(roc_perf, roc_pred)
points(x = 1-opt[2], y = opt[1], pch = 19, col = "red", cex = 1.5)
text(x = 1-opt[2], y = opt[1] + .05, labels = "Optimum Cutoff")
# Area Under the Curve
text(x = .6, y = .3, label = paste("Area Under the Curve:\n",
round(as.numeric(performance(roc_pred, "auc")@y.values), 2)))
text(x = .6, y = .15, label = paste("Optimum Cutoff:\n", round(opt[3],3)))
}
visROC(m1, test, "talent")
library(Boruta)
## Loading required package: ranger
##
## Attaching package: 'ranger'
## The following object is masked from 'package:rattle':
##
## importance
colnames(d)
## [1] "No" "Batch"
## [3] "Wilayah" "Gender"
## [5] "Status_Sipil" "Usia"
## [7] "Pendidikan" "Jurusan"
## [9] "Grade" "Pengalaman_Kerja"
## [11] "Pengalaman_di_A_P_K" "Pengalaman_Sales"
## [13] "Pengalaman_Organisasi" "Pengurus_Organisasi"
## [15] "nilai_training" "DISC1"
## [17] "DISC2" "DISC3"
## [19] "accuracy" "verbal_reasoning"
## [21] "working_memory" "numerical_reasoning"
## [23] "spatial_reasoning" "Fluid_intelligence"
## [25] "arithmetics" "component"
## [27] "word_relation" "figure_classification"
## [29] "crystallized_intelligence" "log_berpikir"
## [31] "k_numerikal" "d_analisa"
## [33] "k_verbal" "o_hasil"
## [35] "fleks" "s_kerja"
## [37] "m_prestasi" "kerjasama"
## [39] "k_interpersonal" "talent"
data<-data.frame(d[,3:40])
colnames(data)
## [1] "Wilayah" "Gender"
## [3] "Status_Sipil" "Usia"
## [5] "Pendidikan" "Jurusan"
## [7] "Grade" "Pengalaman_Kerja"
## [9] "Pengalaman_di_A_P_K" "Pengalaman_Sales"
## [11] "Pengalaman_Organisasi" "Pengurus_Organisasi"
## [13] "nilai_training" "DISC1"
## [15] "DISC2" "DISC3"
## [17] "accuracy" "verbal_reasoning"
## [19] "working_memory" "numerical_reasoning"
## [21] "spatial_reasoning" "Fluid_intelligence"
## [23] "arithmetics" "component"
## [25] "word_relation" "figure_classification"
## [27] "crystallized_intelligence" "log_berpikir"
## [29] "k_numerikal" "d_analisa"
## [31] "k_verbal" "o_hasil"
## [33] "fleks" "s_kerja"
## [35] "m_prestasi" "kerjasama"
## [37] "k_interpersonal" "talent"
fit<-Boruta(talent~.,data,doTrace = 2)
## 1. run of importance source...
## 2. run of importance source...
## 3. run of importance source...
## 4. run of importance source...
## 5. run of importance source...
## 6. run of importance source...
## 7. run of importance source...
## 8. run of importance source...
## 9. run of importance source...
## 10. run of importance source...
## 11. run of importance source...
## 12. run of importance source...
## After 12 iterations, +3.6 secs:
## confirmed 4 attributes: DISC2, Fluid_intelligence, nilai_training, working_memory;
## rejected 23 attributes: arithmetics, component, crystallized_intelligence, DISC1, DISC3 and 18 more;
## still have 10 attributes left.
## 13. run of importance source...
## 14. run of importance source...
## 15. run of importance source...
## 16. run of importance source...
## After 16 iterations, +4.1 secs:
## rejected 1 attribute: kerjasama;
## still have 9 attributes left.
## 17. run of importance source...
## 18. run of importance source...
## 19. run of importance source...
## 20. run of importance source...
## After 20 iterations, +4.5 secs:
## rejected 1 attribute: k_numerikal;
## still have 8 attributes left.
## 21. run of importance source...
## 22. run of importance source...
## 23. run of importance source...
## 24. run of importance source...
## 25. run of importance source...
## 26. run of importance source...
## 27. run of importance source...
## 28. run of importance source...
## 29. run of importance source...
## 30. run of importance source...
## 31. run of importance source...
## 32. run of importance source...
## 33. run of importance source...
## 34. run of importance source...
## 35. run of importance source...
## 36. run of importance source...
## 37. run of importance source...
## 38. run of importance source...
## 39. run of importance source...
## 40. run of importance source...
## 41. run of importance source...
## 42. run of importance source...
## 43. run of importance source...
## 44. run of importance source...
## 45. run of importance source...
## 46. run of importance source...
## 47. run of importance source...
## 48. run of importance source...
## 49. run of importance source...
## 50. run of importance source...
## 51. run of importance source...
## 52. run of importance source...
## 53. run of importance source...
## 54. run of importance source...
## After 54 iterations, +7.7 secs:
## confirmed 1 attribute: spatial_reasoning;
## still have 7 attributes left.
## 55. run of importance source...
## 56. run of importance source...
## 57. run of importance source...
## After 57 iterations, +8 secs:
## confirmed 1 attribute: Wilayah;
## still have 6 attributes left.
## 58. run of importance source...
## 59. run of importance source...
## 60. run of importance source...
## 61. run of importance source...
## 62. run of importance source...
## 63. run of importance source...
## 64. run of importance source...
## 65. run of importance source...
## 66. run of importance source...
## 67. run of importance source...
## 68. run of importance source...
## 69. run of importance source...
## 70. run of importance source...
## After 70 iterations, +9.2 secs:
## confirmed 1 attribute: accuracy;
## still have 5 attributes left.
## 71. run of importance source...
## 72. run of importance source...
## 73. run of importance source...
## 74. run of importance source...
## 75. run of importance source...
## 76. run of importance source...
## 77. run of importance source...
## 78. run of importance source...
## 79. run of importance source...
## 80. run of importance source...
## 81. run of importance source...
## 82. run of importance source...
## 83. run of importance source...
## 84. run of importance source...
## 85. run of importance source...
## 86. run of importance source...
## 87. run of importance source...
## 88. run of importance source...
## 89. run of importance source...
## 90. run of importance source...
## After 90 iterations, +11 secs:
## rejected 1 attribute: m_prestasi;
## still have 4 attributes left.
## 91. run of importance source...
## 92. run of importance source...
## 93. run of importance source...
## 94. run of importance source...
## 95. run of importance source...
## 96. run of importance source...
## 97. run of importance source...
## 98. run of importance source...
## 99. run of importance source...
print(fit)
## Boruta performed 99 iterations in 12.11418 secs.
## 7 attributes confirmed important: accuracy, DISC2, Fluid_intelligence,
## nilai_training, spatial_reasoning and 2 more;
## 26 attributes confirmed unimportant: arithmetics, component,
## crystallized_intelligence, DISC1, DISC3 and 21 more;
## 4 tentative attributes left: d_analisa, log_berpikir,
## numerical_reasoning, verbal_reasoning;
plot(fit, xlab = "", xaxt = "n")
lz<-lapply(1:ncol(fit$ImpHistory),function(i)
fit$ImpHistory[is.finite(fit$ImpHistory[,i]),i])
names(lz) <- colnames(fit$ImpHistory)
Labels <- sort(sapply(lz,median))
axis(side = 1,las=2,labels = names(Labels),
at = 1:ncol(fit$ImpHistory), cex.axis = 0.7)
final.boruta <- TentativeRoughFix(fit)
print(final.boruta)
## Boruta performed 99 iterations in 12.11418 secs.
## Tentatives roughfixed over the last 99 iterations.
## 8 attributes confirmed important: accuracy, d_analisa, DISC2,
## Fluid_intelligence, nilai_training and 3 more;
## 29 attributes confirmed unimportant: arithmetics, component,
## crystallized_intelligence, DISC1, DISC3 and 24 more;
getSelectedAttributes(final.boruta, withTentative = F)
## [1] "Wilayah" "nilai_training" "DISC2"
## [4] "accuracy" "working_memory" "spatial_reasoning"
## [7] "Fluid_intelligence" "d_analisa"
boruta.df <- attStats(final.boruta)
class(boruta.df)
## [1] "data.frame"
print(boruta.df)
## meanImp medianImp minImp maxImp
## Wilayah 4.281520447 4.28176280 0.6918961 7.2543200
## Gender -0.169367506 -0.25521955 -1.2694324 1.2017799
## Status_Sipil -0.673423209 -0.61081244 -1.4507007 0.2873675
## Usia -0.008671456 0.15089719 -1.9624946 2.9368297
## Pendidikan 0.182483350 0.09819087 -1.9308064 2.0958115
## Jurusan -1.175227814 -1.22980422 -2.9496595 0.9785804
## Grade -0.916919520 -1.19857690 -2.4374981 1.3835051
## Pengalaman_Kerja -0.034506684 0.01868893 -0.8107963 1.3385181
## Pengalaman_di_A_P_K 0.158282198 0.29034962 -2.1282892 2.0206998
## Pengalaman_Sales -0.304632685 -0.12687778 -1.9232845 2.1701491
## Pengalaman_Organisasi 0.122814795 0.01294493 -1.4521552 1.7099437
## Pengurus_Organisasi -0.426232779 -0.46312997 -2.1784278 1.3701429
## nilai_training 13.021707637 13.19883285 7.6039629 15.7464303
## DISC1 -0.038063229 0.42288754 -3.0668216 1.4650638
## DISC2 6.924835197 6.79236182 4.0070215 10.2941573
## DISC3 1.023129908 0.93594216 -0.2980497 2.6295339
## accuracy 4.047673243 4.03121128 1.0404078 6.9752057
## verbal_reasoning 3.535111408 3.64633670 1.0924636 5.5505223
## working_memory 9.432672704 9.63651049 5.4826078 12.0420903
## numerical_reasoning 2.428104603 2.31668763 -0.4732549 5.1840780
## spatial_reasoning 4.844048995 4.82840404 1.6756522 8.2779908
## Fluid_intelligence 25.430979416 26.06524806 16.4980934 29.5839571
## arithmetics -0.379232810 -0.21226138 -2.9517659 1.0746969
## component 0.404852696 0.56973541 -1.6680758 1.9543246
## word_relation 0.043312125 0.07436470 -1.4035494 1.5065074
## figure_classification 0.496912877 0.93683268 -2.9157186 2.3127940
## crystallized_intelligence 0.223424235 -0.25115699 -1.5130874 1.9481885
## log_berpikir 2.109388140 2.15034720 -0.5151150 4.2780964
## k_numerikal 0.418949346 0.26857232 -1.9337006 2.2632692
## d_analisa 2.512005692 2.52609972 -0.8904858 4.8810567
## k_verbal 0.670101360 0.72164183 -1.9458157 2.5788766
## o_hasil -0.272192534 -0.02734580 -2.4752752 0.5699778
## fleks 0.362203372 0.33199919 -1.3579975 1.4166128
## s_kerja -0.842515752 -0.83675328 -2.7561931 0.3570894
## m_prestasi 1.935109034 1.91436200 -1.3556900 4.8761480
## kerjasama 0.402254421 0.43774888 -1.9781594 3.1067588
## k_interpersonal -0.172292304 -0.25113370 -2.0422500 1.7123710
## normHits decision
## Wilayah 0.81818182 Confirmed
## Gender 0.00000000 Rejected
## Status_Sipil 0.00000000 Rejected
## Usia 0.00000000 Rejected
## Pendidikan 0.00000000 Rejected
## Jurusan 0.00000000 Rejected
## Grade 0.00000000 Rejected
## Pengalaman_Kerja 0.00000000 Rejected
## Pengalaman_di_A_P_K 0.00000000 Rejected
## Pengalaman_Sales 0.00000000 Rejected
## Pengalaman_Organisasi 0.00000000 Rejected
## Pengurus_Organisasi 0.00000000 Rejected
## nilai_training 1.00000000 Confirmed
## DISC1 0.00000000 Rejected
## DISC2 0.98989899 Confirmed
## DISC3 0.00000000 Rejected
## accuracy 0.74747475 Confirmed
## verbal_reasoning 0.63636364 Rejected
## working_memory 1.00000000 Confirmed
## numerical_reasoning 0.45454545 Rejected
## spatial_reasoning 0.84848485 Confirmed
## Fluid_intelligence 1.00000000 Confirmed
## arithmetics 0.00000000 Rejected
## component 0.00000000 Rejected
## word_relation 0.00000000 Rejected
## figure_classification 0.00000000 Rejected
## crystallized_intelligence 0.00000000 Rejected
## log_berpikir 0.36363636 Rejected
## k_numerikal 0.02020202 Rejected
## d_analisa 0.46464646 Confirmed
## k_verbal 0.00000000 Rejected
## o_hasil 0.00000000 Rejected
## fleks 0.00000000 Rejected
## s_kerja 0.00000000 Rejected
## m_prestasi 0.28282828 Rejected
## kerjasama 0.01010101 Rejected
## k_interpersonal 0.00000000 Rejected
set.seed(42)
fit <- rpart(talent ~ nilai_training + accuracy+verbal_reasoning+working_memory+numerical_reasoning+spatial_reasoning+Fluid_intelligence+arithmetics+component+word_relation+figure_classification+crystallized_intelligence+log_berpikir+k_numerikal+d_analisa+k_verbal+o_hasil+fleks+s_kerja+m_prestasi+kerjasama+k_interpersonal, data=train,
method="class")
fit # basic model results
## n= 234
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 234 81 0 (0.65384615 0.34615385)
## 2) Fluid_intelligence< 94.5 57 5 0 (0.91228070 0.08771930) *
## 3) Fluid_intelligence>=94.5 177 76 0 (0.57062147 0.42937853)
## 6) working_memory>=98 157 58 0 (0.63057325 0.36942675)
## 12) spatial_reasoning>=100 43 8 0 (0.81395349 0.18604651) *
## 13) spatial_reasoning< 100 114 50 0 (0.56140351 0.43859649)
## 26) Fluid_intelligence< 105.5 88 32 0 (0.63636364 0.36363636)
## 52) working_memory>=107 59 15 0 (0.74576271 0.25423729)
## 104) fleks>=3.5 11 0 0 (1.00000000 0.00000000) *
## 105) fleks< 3.5 48 15 0 (0.68750000 0.31250000)
## 210) component>=14.5 11 1 0 (0.90909091 0.09090909) *
## 211) component< 14.5 37 14 0 (0.62162162 0.37837838)
## 422) spatial_reasoning< 90 29 8 0 (0.72413793 0.27586207)
## 844) verbal_reasoning< 89 8 0 0 (1.00000000 0.00000000) *
## 845) verbal_reasoning>=89 21 8 0 (0.61904762 0.38095238)
## 1690) verbal_reasoning>=99.5 12 2 0 (0.83333333 0.16666667) *
## 1691) verbal_reasoning< 99.5 9 3 1 (0.33333333 0.66666667) *
## 423) spatial_reasoning>=90 8 2 1 (0.25000000 0.75000000) *
## 53) working_memory< 107 29 12 1 (0.41379310 0.58620690)
## 106) k_numerikal>=2.5 19 8 0 (0.57894737 0.42105263) *
## 107) k_numerikal< 2.5 10 1 1 (0.10000000 0.90000000) *
## 27) Fluid_intelligence>=105.5 26 8 1 (0.30769231 0.69230769)
## 54) verbal_reasoning>=103 15 7 0 (0.53333333 0.46666667) *
## 55) verbal_reasoning< 103 11 0 1 (0.00000000 1.00000000) *
## 7) working_memory< 98 20 2 1 (0.10000000 0.90000000) *
par(mar = c(5,4,1,2)) #setting the margins
fancyRpartPlot(fit, sub = NULL, main = "Final Decision Tree")
fit_test <- predict(fit, newdata= test, type = "prob")
head(fit_test)
## 0 1
## 1 0.8139535 0.18604651
## 2 0.8333333 0.16666667
## 9 0.8139535 0.18604651
## 10 0.9090909 0.09090909
## 12 0.2500000 0.75000000
## 14 0.1000000 0.90000000
We’ll need to use the second column when creating our confusion matrix.
prop.table(table(test$talent))
##
## 0 1
## 0.6551724 0.3448276
accuracy <- table(fit_test[,2] > .5, test$talent) # confusion matrix
addmargins(table(fit_test[,2] > .5, test$talent))
##
## 0 1 Sum
## FALSE 61 20 81
## TRUE 15 20 35
## Sum 76 40 116
sum(diag(accuracy))/ sum(accuracy) #total on diagonals/ total obs
## [1] 0.6982759
Semoga bermanfaat.