Package
Silahkan install jika belum ada
install.packages("tidyverse")
install.packages("mlr3verse")
install.packages("ranger")
install.packages("rpart.plot")
install.packages("RWeka")
install.packages("precrec")
remotes::install_github("mlr-org/mlr3extralearners")Memanggil Package
Import data ke R
## checking_status duration credit_history purpose
## 1 '<0' 6 'critical/other existing credit' radio/tv
## 2 '0<=X<200' 48 'existing paid' radio/tv
## 3 'no checking' 12 'critical/other existing credit' education
## 4 '<0' 42 'existing paid' furniture/equipment
## 5 '<0' 24 'delayed previously' 'new car'
## 6 'no checking' 36 'existing paid' education
## credit_amount savings_status employment installment_commitment
## 1 1169 'no known savings' '>=7' 4
## 2 5951 '<100' '1<=X<4' 2
## 3 2096 '<100' '4<=X<7' 2
## 4 7882 '<100' '4<=X<7' 2
## 5 4870 '<100' '1<=X<4' 3
## 6 9055 'no known savings' '1<=X<4' 2
## personal_status other_parties residence_since property_magnitude age
## 1 'male single' none 4 'real estate' 67
## 2 'female div/dep/mar' none 2 'real estate' 22
## 3 'male single' none 3 'real estate' 49
## 4 'male single' guarantor 4 'life insurance' 45
## 5 'male single' none 4 'no known property' 53
## 6 'male single' none 4 'no known property' 35
## other_payment_plans housing existing_credits job
## 1 none own 2 skilled
## 2 none own 1 skilled
## 3 none own 1 'unskilled resident'
## 4 none 'for free' 1 skilled
## 5 none 'for free' 2 skilled
## 6 none 'for free' 1 'unskilled resident'
## num_dependents own_telephone foreign_worker class
## 1 1 yes yes good
## 2 1 none yes bad
## 3 2 none yes good
## 4 2 none yes good
## 5 2 none yes bad
## 6 2 yes yes good
mengubah tipe data integer ke numerik
data_airfoil <- read.table("airfoil_self_noise.dat")
colnames(data_airfoil) <- c("Frequency","Angle_of_Attack","Chord_length","Free_stream_velocity","Suction_side","Scaled_sound")
head(data_airfoil)## Frequency Angle_of_Attack Chord_length Free_stream_velocity Suction_side
## 1 800 0 0.3048 71.3 0.00266337
## 2 1000 0 0.3048 71.3 0.00266337
## 3 1250 0 0.3048 71.3 0.00266337
## 4 1600 0 0.3048 71.3 0.00266337
## 5 2000 0 0.3048 71.3 0.00266337
## 6 2500 0 0.3048 71.3 0.00266337
## Scaled_sound
## 1 126.201
## 2 125.201
## 3 125.951
## 4 127.591
## 5 127.461
## 6 125.571
Import data ke ekosistem mlr3
Menentukan model yang digunakan
- Bagging
model_bagging_classif <- pipeline_bagging(lrn("classif.rpart", predict_type="prob"), iterations = 100,averager = po("classifavg"))model_bagging_regr <- pipeline_bagging(lrn("regr.rpart"), iterations = 100,averager = po("regravg"))- Random Forest
- Ada Boost
Melakukan interpretasi model (jika diperlukan)
## age checking_status credit_amount
## 31.445540 38.991956 43.563684
## credit_history duration employment
## 19.245259 32.637067 14.073071
## existing_credits foreign_worker housing
## 5.831078 1.538162 8.256197
## installment_commitment job num_dependents
## 11.431669 7.932523 3.797136
## other_parties other_payment_plans own_telephone
## 5.986914 9.693468 4.747957
## personal_status property_magnitude purpose
## 8.864974 10.998645 17.537670
## residence_since savings_status
## 10.075722 14.481703
## Angle_of_Attack Chord_length Free_stream_velocity
## 7184.127 8450.686 2975.771
## Frequency Suction_side
## 29066.666 17613.132
importance_classif <- data.frame(Predictors = names(model_rf_classif$model$variable.importance),
impurity = model_rf_classif$model$variable.importance
)
rownames(importance_classif) <- NULL
importance_classif %>% arrange(desc(impurity))## Predictors impurity
## 1 credit_amount 43.563684
## 2 checking_status 38.991956
## 3 duration 32.637067
## 4 age 31.445540
## 5 credit_history 19.245259
## 6 purpose 17.537670
## 7 savings_status 14.481703
## 8 employment 14.073071
## 9 installment_commitment 11.431669
## 10 property_magnitude 10.998645
## 11 residence_since 10.075722
## 12 other_payment_plans 9.693468
## 13 personal_status 8.864974
## 14 housing 8.256197
## 15 job 7.932523
## 16 other_parties 5.986914
## 17 existing_credits 5.831078
## 18 own_telephone 4.747957
## 19 num_dependents 3.797136
## 20 foreign_worker 1.538162
importance_regr <- data.frame(Predictors = names(model_rf_regr$model$variable.importance),
variance = model_rf_regr$model$variable.importance
)
rownames(importance_regr) <- NULL
importance_regr %>% arrange(desc(variance))## Predictors variance
## 1 Frequency 29066.666
## 2 Suction_side 17613.132
## 3 Chord_length 8450.686
## 4 Angle_of_Attack 7184.127
## 5 Free_stream_velocity 2975.771
Komparasi model
Komparasi model bisa dilakukan dengan menggunakan fungsi benchmark_design dan benchmark. Fungsi benchmark_design digunakan untuk memasukan informasi-inforamsi yang dibutuhkan untuk komparasi, seperti data yang digunakan (tasks), model yang ingin dikomparasi (learners) dan metode pembagian data yang digunakan (resamplings).
design_classif <- benchmark_grid(tasks = task_credit,
learners = learner_credit,
resamplings = resample_cv
)
design_regr <- benchmark_grid(tasks = task_airfoil,
learners = learner_airfoil,
resamplings = resample_cv
)Kemudian fungsi benchmark digunakan untuk menjalankan/ running komparasi model berdasarkan desain yang sudah dirancang.
Karena terdapat 5 model dan masing-masing model menjalankan 10-folds cross-validation maka iterasi yang dilakukan ada sebanyak 50 kali.
Hasil Komparasi model
Hasil komparasi model dapat berupa nilai-nilai ukuran kebaikan model yang ditentukan oleh pengguna.
## nr resample_result task_id
## 1: 1 <ResampleResult[21]> credit
## 2: 2 <ResampleResult[21]> credit
## 3: 3 <ResampleResult[21]> credit
## learner_id
## 1: subsample_1.subsample_2.subsample_3.subsample_4.subsample_5.subsample_6.subsample_7.subsample_8.subsample_9.subsample_10.subsample_11.subsample_12.subsample_13.subsample_14.subsample_15.subsample_16.subsample_17.subsample_18.subsample_19.subsample_20.subsample_21.subsample_22.subsample_23.subsample_24.subsample_25.subsample_26.subsample_27.subsample_28.subsample_29.subsample_30.subsample_31.subsample_32.subsample_33.subsample_34.subsample_35.subsample_36.subsample_37.subsample_38.subsample_39.subsample_40.subsample_41.subsample_42.subsample_43.subsample_44.subsample_45.subsample_46.subsample_47.subsample_48.subsample_49.subsample_50.subsample_51.subsample_52.subsample_53.subsample_54.subsample_55.subsample_56.subsample_57.subsample_58.subsample_59.subsample_60.subsample_61.subsample_62.subsample_63.subsample_64.subsample_65.subsample_66.subsample_67.subsample_68.subsample_69.subsample_70.subsample_71.subsample_72.subsample_73.subsample_74.subsample_75.subsample_76.subsample_77.subsample_78.subsample_79.subsample_80.subsample_81.subsample_82.subsample_83.subsample_84.subsample_85.subsample_86.subsample_87.subsample_88.subsample_89.subsample_90.subsample_91.subsample_92.subsample_93.subsample_94.subsample_95.subsample_96.subsample_97.subsample_98.subsample_99.subsample_100.classif.rpart_1.classif.rpart_2.classif.rpart_3.classif.rpart_4.classif.rpart_5.classif.rpart_6.classif.rpart_7.classif.rpart_8.classif.rpart_9.classif.rpart_10.classif.rpart_11.classif.rpart_12.classif.rpart_13.classif.rpart_14.classif.rpart_15.classif.rpart_16.classif.rpart_17.classif.rpart_18.classif.rpart_19.classif.rpart_20.classif.rpart_21.classif.rpart_22.classif.rpart_23.classif.rpart_24.classif.rpart_25.classif.rpart_26.classif.rpart_27.classif.rpart_28.classif.rpart_29.classif.rpart_30.classif.rpart_31.classif.rpart_32.classif.rpart_33.classif.rpart_34.classif.rpart_35.classif.rpart_36.classif.rpart_37.classif.rpart_38.classif.rpart_39.classif.rpart_40.classif.rpart_41.classif.rpart_42.classif.rpart_43.classif.rpart_44.classif.rpart_45.classif.rpart_46.classif.rpart_47.classif.rpart_48.classif.rpart_49.classif.rpart_50.classif.rpart_51.classif.rpart_52.classif.rpart_53.classif.rpart_54.classif.rpart_55.classif.rpart_56.classif.rpart_57.classif.rpart_58.classif.rpart_59.classif.rpart_60.classif.rpart_61.classif.rpart_62.classif.rpart_63.classif.rpart_64.classif.rpart_65.classif.rpart_66.classif.rpart_67.classif.rpart_68.classif.rpart_69.classif.rpart_70.classif.rpart_71.classif.rpart_72.classif.rpart_73.classif.rpart_74.classif.rpart_75.classif.rpart_76.classif.rpart_77.classif.rpart_78.classif.rpart_79.classif.rpart_80.classif.rpart_81.classif.rpart_82.classif.rpart_83.classif.rpart_84.classif.rpart_85.classif.rpart_86.classif.rpart_87.classif.rpart_88.classif.rpart_89.classif.rpart_90.classif.rpart_91.classif.rpart_92.classif.rpart_93.classif.rpart_94.classif.rpart_95.classif.rpart_96.classif.rpart_97.classif.rpart_98.classif.rpart_99.classif.rpart_100.classifavg
## 2: classif.ranger
## 3: classif.AdaBoostM1
## resampling_id iters classif.acc classif.auc
## 1: cv 10 0.757 0.7694201
## 2: cv 10 0.757 0.7919533
## 3: cv 10 0.706 0.7257064
Berdasarkan nilai akurasi model yang memiliki performa prediksi terbaik adalah model regresi logistik.
## nr resample_result task_id
## 1: 1 <ResampleResult[21]> airfoil
## 2: 2 <ResampleResult[21]> airfoil
## learner_id
## 1: subsample_1.subsample_2.subsample_3.subsample_4.subsample_5.subsample_6.subsample_7.subsample_8.subsample_9.subsample_10.subsample_11.subsample_12.subsample_13.subsample_14.subsample_15.subsample_16.subsample_17.subsample_18.subsample_19.subsample_20.subsample_21.subsample_22.subsample_23.subsample_24.subsample_25.subsample_26.subsample_27.subsample_28.subsample_29.subsample_30.subsample_31.subsample_32.subsample_33.subsample_34.subsample_35.subsample_36.subsample_37.subsample_38.subsample_39.subsample_40.subsample_41.subsample_42.subsample_43.subsample_44.subsample_45.subsample_46.subsample_47.subsample_48.subsample_49.subsample_50.subsample_51.subsample_52.subsample_53.subsample_54.subsample_55.subsample_56.subsample_57.subsample_58.subsample_59.subsample_60.subsample_61.subsample_62.subsample_63.subsample_64.subsample_65.subsample_66.subsample_67.subsample_68.subsample_69.subsample_70.subsample_71.subsample_72.subsample_73.subsample_74.subsample_75.subsample_76.subsample_77.subsample_78.subsample_79.subsample_80.subsample_81.subsample_82.subsample_83.subsample_84.subsample_85.subsample_86.subsample_87.subsample_88.subsample_89.subsample_90.subsample_91.subsample_92.subsample_93.subsample_94.subsample_95.subsample_96.subsample_97.subsample_98.subsample_99.subsample_100.regr.rpart_1.regr.rpart_2.regr.rpart_3.regr.rpart_4.regr.rpart_5.regr.rpart_6.regr.rpart_7.regr.rpart_8.regr.rpart_9.regr.rpart_10.regr.rpart_11.regr.rpart_12.regr.rpart_13.regr.rpart_14.regr.rpart_15.regr.rpart_16.regr.rpart_17.regr.rpart_18.regr.rpart_19.regr.rpart_20.regr.rpart_21.regr.rpart_22.regr.rpart_23.regr.rpart_24.regr.rpart_25.regr.rpart_26.regr.rpart_27.regr.rpart_28.regr.rpart_29.regr.rpart_30.regr.rpart_31.regr.rpart_32.regr.rpart_33.regr.rpart_34.regr.rpart_35.regr.rpart_36.regr.rpart_37.regr.rpart_38.regr.rpart_39.regr.rpart_40.regr.rpart_41.regr.rpart_42.regr.rpart_43.regr.rpart_44.regr.rpart_45.regr.rpart_46.regr.rpart_47.regr.rpart_48.regr.rpart_49.regr.rpart_50.regr.rpart_51.regr.rpart_52.regr.rpart_53.regr.rpart_54.regr.rpart_55.regr.rpart_56.regr.rpart_57.regr.rpart_58.regr.rpart_59.regr.rpart_60.regr.rpart_61.regr.rpart_62.regr.rpart_63.regr.rpart_64.regr.rpart_65.regr.rpart_66.regr.rpart_67.regr.rpart_68.regr.rpart_69.regr.rpart_70.regr.rpart_71.regr.rpart_72.regr.rpart_73.regr.rpart_74.regr.rpart_75.regr.rpart_76.regr.rpart_77.regr.rpart_78.regr.rpart_79.regr.rpart_80.regr.rpart_81.regr.rpart_82.regr.rpart_83.regr.rpart_84.regr.rpart_85.regr.rpart_86.regr.rpart_87.regr.rpart_88.regr.rpart_89.regr.rpart_90.regr.rpart_91.regr.rpart_92.regr.rpart_93.regr.rpart_94.regr.rpart_95.regr.rpart_96.regr.rpart_97.regr.rpart_98.regr.rpart_99.regr.rpart_100.regravg
## 2: regr.ranger
## resampling_id iters regr.mse regr.srho
## 1: cv 10 14.488646 0.8169416
## 2: cv 10 4.802364 0.9478668