#kELOPMOK 5
#Amaany (23031554038)
#Nurin Nasi'ah Salsabila (23031554148)
##LOAD DAN EXPLOR DATA##
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.4.3
## Loading required package: ggplot2
df_MLR <- read.csv("C:\\Users\\ASUS\\Downloads\\cumulative.csv (1)\\cumulative.csv", header = TRUE, sep = ",")
head(df_MLR, n=5)
## rowid kepid kepoi_name kepler_name koi_disposition koi_pdisposition
## 1 1 10797460 K00752.01 Kepler-227 b CONFIRMED CANDIDATE
## 2 2 10797460 K00752.02 Kepler-227 c CONFIRMED CANDIDATE
## 3 3 10811496 K00753.01 FALSE POSITIVE FALSE POSITIVE
## 4 4 10848459 K00754.01 FALSE POSITIVE FALSE POSITIVE
## 5 5 10854555 K00755.01 Kepler-664 b CONFIRMED CANDIDATE
## koi_score koi_fpflag_nt koi_fpflag_ss koi_fpflag_co koi_fpflag_ec koi_period
## 1 1.000 0 0 0 0 9.488036
## 2 0.969 0 0 0 0 54.418383
## 3 0.000 0 1 0 0 19.899140
## 4 0.000 0 1 0 0 1.736952
## 5 1.000 0 0 0 0 2.525592
## koi_period_err1 koi_period_err2 koi_time0bk koi_time0bk_err1 koi_time0bk_err2
## 1 2.775e-05 -2.775e-05 170.5387 0.002160 -0.002160
## 2 2.479e-04 -2.479e-04 162.5138 0.003520 -0.003520
## 3 1.494e-05 -1.494e-05 175.8503 0.000581 -0.000581
## 4 2.630e-07 -2.630e-07 170.3076 0.000115 -0.000115
## 5 3.761e-06 -3.761e-06 171.5956 0.001130 -0.001130
## koi_impact koi_impact_err1 koi_impact_err2 koi_duration koi_duration_err1
## 1 0.146 0.318 -0.146 2.95750 0.08190
## 2 0.586 0.059 -0.443 4.50700 0.11600
## 3 0.969 5.126 -0.077 1.78220 0.03410
## 4 1.276 0.115 -0.092 2.40641 0.00537
## 5 0.701 0.235 -0.478 1.65450 0.04200
## koi_duration_err2 koi_depth koi_depth_err1 koi_depth_err2 koi_prad
## 1 -0.08190 615.8 19.5 -19.5 2.26
## 2 -0.11600 874.8 35.5 -35.5 2.83
## 3 -0.03410 10829.0 171.0 -171.0 14.60
## 4 -0.00537 8079.2 12.8 -12.8 33.46
## 5 -0.04200 603.3 16.9 -16.9 2.75
## koi_prad_err1 koi_prad_err2 koi_teq koi_teq_err1 koi_teq_err2 koi_insol
## 1 0.26 -0.15 793 NA NA 93.59
## 2 0.32 -0.19 443 NA NA 9.11
## 3 3.92 -1.31 638 NA NA 39.30
## 4 8.50 -2.83 1395 NA NA 891.96
## 5 0.88 -0.35 1406 NA NA 926.16
## koi_insol_err1 koi_insol_err2 koi_model_snr koi_tce_plnt_num
## 1 29.45 -16.65 35.8 1
## 2 2.87 -1.62 25.8 2
## 3 31.04 -10.49 76.3 1
## 4 668.95 -230.35 505.6 1
## 5 874.33 -314.24 40.9 1
## koi_tce_delivname koi_steff koi_steff_err1 koi_steff_err2 koi_slogg
## 1 q1_q17_dr25_tce 5455 81 -81 4.467
## 2 q1_q17_dr25_tce 5455 81 -81 4.467
## 3 q1_q17_dr25_tce 5853 158 -176 4.544
## 4 q1_q17_dr25_tce 5805 157 -174 4.564
## 5 q1_q17_dr25_tce 6031 169 -211 4.438
## koi_slogg_err1 koi_slogg_err2 koi_srad koi_srad_err1 koi_srad_err2 ra
## 1 0.064 -0.096 0.927 0.105 -0.061 291.9342
## 2 0.064 -0.096 0.927 0.105 -0.061 291.9342
## 3 0.044 -0.176 0.868 0.233 -0.078 297.0048
## 4 0.053 -0.168 0.791 0.201 -0.067 285.5346
## 5 0.070 -0.210 1.046 0.334 -0.133 288.7549
## dec koi_kepmag
## 1 48.14165 15.347
## 2 48.14165 15.347
## 3 48.13413 15.436
## 4 48.28521 15.597
## 5 48.22620 15.509
names(df_MLR)
## [1] "rowid" "kepid" "kepoi_name"
## [4] "kepler_name" "koi_disposition" "koi_pdisposition"
## [7] "koi_score" "koi_fpflag_nt" "koi_fpflag_ss"
## [10] "koi_fpflag_co" "koi_fpflag_ec" "koi_period"
## [13] "koi_period_err1" "koi_period_err2" "koi_time0bk"
## [16] "koi_time0bk_err1" "koi_time0bk_err2" "koi_impact"
## [19] "koi_impact_err1" "koi_impact_err2" "koi_duration"
## [22] "koi_duration_err1" "koi_duration_err2" "koi_depth"
## [25] "koi_depth_err1" "koi_depth_err2" "koi_prad"
## [28] "koi_prad_err1" "koi_prad_err2" "koi_teq"
## [31] "koi_teq_err1" "koi_teq_err2" "koi_insol"
## [34] "koi_insol_err1" "koi_insol_err2" "koi_model_snr"
## [37] "koi_tce_plnt_num" "koi_tce_delivname" "koi_steff"
## [40] "koi_steff_err1" "koi_steff_err2" "koi_slogg"
## [43] "koi_slogg_err1" "koi_slogg_err2" "koi_srad"
## [46] "koi_srad_err1" "koi_srad_err2" "ra"
## [49] "dec" "koi_kepmag"
nrow(df_MLR)
## [1] 9564
ncol(df_MLR)
## [1] 50
str(df_MLR)
## 'data.frame': 9564 obs. of 50 variables:
## $ rowid : int 1 2 3 4 5 6 7 8 9 10 ...
## $ kepid : int 10797460 10797460 10811496 10848459 10854555 10872983 10872983 10872983 6721123 10910878 ...
## $ kepoi_name : chr "K00752.01" "K00752.02" "K00753.01" "K00754.01" ...
## $ kepler_name : chr "Kepler-227 b" "Kepler-227 c" "" "" ...
## $ koi_disposition : chr "CONFIRMED" "CONFIRMED" "FALSE POSITIVE" "FALSE POSITIVE" ...
## $ koi_pdisposition : chr "CANDIDATE" "CANDIDATE" "FALSE POSITIVE" "FALSE POSITIVE" ...
## $ koi_score : num 1 0.969 0 0 1 1 1 0.992 0 1 ...
## $ koi_fpflag_nt : int 0 0 0 0 0 0 0 0 0 0 ...
## $ koi_fpflag_ss : int 0 0 1 1 0 0 0 0 1 0 ...
## $ koi_fpflag_co : int 0 0 0 0 0 0 0 0 1 0 ...
## $ koi_fpflag_ec : int 0 0 0 0 0 0 0 0 0 0 ...
## $ koi_period : num 9.49 54.42 19.9 1.74 2.53 ...
## $ koi_period_err1 : num 2.78e-05 2.48e-04 1.49e-05 2.63e-07 3.76e-06 ...
## $ koi_period_err2 : num -2.78e-05 -2.48e-04 -1.49e-05 -2.63e-07 -3.76e-06 ...
## $ koi_time0bk : num 171 163 176 170 172 ...
## $ koi_time0bk_err1 : num 0.00216 0.00352 0.000581 0.000115 0.00113 0.00141 0.0019 0.00461 0.00253 0.000517 ...
## $ koi_time0bk_err2 : num -0.00216 -0.00352 -0.000581 -0.000115 -0.00113 -0.00141 -0.0019 -0.00461 -0.00253 -0.000517 ...
## $ koi_impact : num 0.146 0.586 0.969 1.276 0.701 ...
## $ koi_impact_err1 : num 0.318 0.059 5.126 0.115 0.235 ...
## $ koi_impact_err2 : num -0.146 -0.443 -0.077 -0.092 -0.478 -0.428 -0.532 -0.523 -0.044 -0.052 ...
## $ koi_duration : num 2.96 4.51 1.78 2.41 1.65 ...
## $ koi_duration_err1: num 0.0819 0.116 0.0341 0.00537 0.042 0.061 0.0673 0.165 0.136 0.0241 ...
## $ koi_duration_err2: num -0.0819 -0.116 -0.0341 -0.00537 -0.042 -0.061 -0.0673 -0.165 -0.136 -0.0241 ...
## $ koi_depth : num 616 875 10829 8079 603 ...
## $ koi_depth_err1 : num 19.5 35.5 171 12.8 16.9 24.2 18.7 16.8 5.8 33.3 ...
## $ koi_depth_err2 : num -19.5 -35.5 -171 -12.8 -16.9 -24.2 -18.7 -16.8 -5.8 -33.3 ...
## $ koi_prad : num 2.26 2.83 14.6 33.46 2.75 ...
## $ koi_prad_err1 : num 0.26 0.32 3.92 8.5 0.88 1.27 0.9 0.52 6.45 0.22 ...
## $ koi_prad_err2 : num -0.15 -0.19 -1.31 -2.83 -0.35 -0.42 -0.3 -0.17 -9.67 -0.49 ...
## $ koi_teq : num 793 443 638 1395 1406 ...
## $ koi_teq_err1 : logi NA NA NA NA NA NA ...
## $ koi_teq_err2 : logi NA NA NA NA NA NA ...
## $ koi_insol : num 93.59 9.11 39.3 891.96 926.16 ...
## $ koi_insol_err1 : num 29.45 2.87 31.04 668.95 874.33 ...
## $ koi_insol_err2 : num -16.65 -1.62 -10.49 -230.35 -314.24 ...
## $ koi_model_snr : num 35.8 25.8 76.3 505.6 40.9 ...
## $ koi_tce_plnt_num : int 1 2 1 1 1 1 2 3 1 1 ...
## $ koi_tce_delivname: chr "q1_q17_dr25_tce" "q1_q17_dr25_tce" "q1_q17_dr25_tce" "q1_q17_dr25_tce" ...
## $ koi_steff : num 5455 5455 5853 5805 6031 ...
## $ koi_steff_err1 : num 81 81 158 157 169 189 189 189 111 75 ...
## $ koi_steff_err2 : num -81 -81 -176 -174 -211 -232 -232 -232 -124 -83 ...
## $ koi_slogg : num 4.47 4.47 4.54 4.56 4.44 ...
## $ koi_slogg_err1 : num 0.064 0.064 0.044 0.053 0.07 0.054 0.054 0.054 0.182 0.083 ...
## $ koi_slogg_err2 : num -0.096 -0.096 -0.176 -0.168 -0.21 -0.229 -0.229 -0.229 -0.098 -0.028 ...
## $ koi_srad : num 0.927 0.927 0.868 0.791 1.046 ...
## $ koi_srad_err1 : num 0.105 0.105 0.233 0.201 0.334 0.315 0.315 0.315 0.322 0.033 ...
## $ koi_srad_err2 : num -0.061 -0.061 -0.078 -0.067 -0.133 -0.105 -0.105 -0.105 -0.483 -0.072 ...
## $ ra : num 292 292 297 286 289 ...
## $ dec : num 48.1 48.1 48.1 48.3 48.2 ...
## $ koi_kepmag : num 15.3 15.3 15.4 15.6 15.5 ...
summary(df_MLR)
## rowid kepid kepoi_name kepler_name
## Min. : 1 Min. : 757450 Length:9564 Length:9564
## 1st Qu.:2392 1st Qu.: 5556034 Class :character Class :character
## Median :4782 Median : 7906892 Mode :character Mode :character
## Mean :4782 Mean : 7690628
## 3rd Qu.:7173 3rd Qu.: 9873066
## Max. :9564 Max. :12935144
##
## koi_disposition koi_pdisposition koi_score koi_fpflag_nt
## Length:9564 Length:9564 Min. :0.0000 Min. :0.0000
## Class :character Class :character 1st Qu.:0.0000 1st Qu.:0.0000
## Mode :character Mode :character Median :0.3340 Median :0.0000
## Mean :0.4808 Mean :0.1882
## 3rd Qu.:0.9980 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000
## NA's :1510
## koi_fpflag_ss koi_fpflag_co koi_fpflag_ec koi_period
## Min. :0.0000 Min. :0.0000 Min. :0.00 Min. : 0.24
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.: 2.73
## Median :0.0000 Median :0.0000 Median :0.00 Median : 9.75
## Mean :0.2316 Mean :0.1949 Mean :0.12 Mean : 75.67
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00 3rd Qu.: 40.72
## Max. :1.0000 Max. :1.0000 Max. :1.00 Max. :129995.78
##
## koi_period_err1 koi_period_err2 koi_time0bk koi_time0bk_err1
## Min. :0.0000 Min. :-0.1725 Min. : 120.5 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:-0.0003 1st Qu.: 132.8 1st Qu.:0.0012
## Median :0.0000 Median : 0.0000 Median : 137.2 Median :0.0041
## Mean :0.0021 Mean :-0.0021 Mean : 166.2 Mean :0.0099
## 3rd Qu.:0.0003 3rd Qu.: 0.0000 3rd Qu.: 170.7 3rd Qu.:0.0105
## Max. :0.1725 Max. : 0.0000 Max. :1472.5 Max. :0.5690
## NA's :454 NA's :454 NA's :454
## koi_time0bk_err2 koi_impact koi_impact_err1 koi_impact_err2
## Min. :-0.5690 Min. : 0.0000 Min. : 0.000 Min. :-59.3200
## 1st Qu.:-0.0105 1st Qu.: 0.1970 1st Qu.: 0.040 1st Qu.: -0.4450
## Median :-0.0041 Median : 0.5370 Median : 0.193 Median : -0.2070
## Mean :-0.0099 Mean : 0.7351 Mean : 1.960 Mean : -0.3326
## 3rd Qu.:-0.0012 3rd Qu.: 0.8890 3rd Qu.: 0.378 3rd Qu.: -0.0460
## Max. : 0.0000 Max. :100.8060 Max. :85.540 Max. : 0.0000
## NA's :454 NA's :363 NA's :454 NA's :454
## koi_duration koi_duration_err1 koi_duration_err2 koi_depth
## Min. : 0.052 Min. : 0.0000 Min. :-20.2000 Min. : 0.0
## 1st Qu.: 2.438 1st Qu.: 0.0508 1st Qu.: -0.3500 1st Qu.: 159.9
## Median : 3.793 Median : 0.1420 Median : -0.1420 Median : 421.1
## Mean : 5.622 Mean : 0.3399 Mean : -0.3399 Mean : 23791.3
## 3rd Qu.: 6.277 3rd Qu.: 0.3500 3rd Qu.: -0.0508 3rd Qu.: 1473.4
## Max. :138.540 Max. :20.2000 Max. : 0.0000 Max. :1541400.0
## NA's :454 NA's :454 NA's :363
## koi_depth_err1 koi_depth_err2 koi_prad koi_prad_err1
## Min. : 0.0 Min. :-388600.0 Min. : 0.08 Min. : 0.00
## 1st Qu.: 9.6 1st Qu.: -49.5 1st Qu.: 1.40 1st Qu.: 0.23
## Median : 20.8 Median : -20.8 Median : 2.39 Median : 0.52
## Mean : 123.2 Mean : -123.2 Mean : 102.89 Mean : 17.66
## 3rd Qu.: 49.5 3rd Qu.: -9.6 3rd Qu.: 14.93 3rd Qu.: 2.32
## Max. :388600.0 Max. : 0.0 Max. :200346.00 Max. :21640.00
## NA's :454 NA's :454 NA's :363 NA's :363
## koi_prad_err2 koi_teq koi_teq_err1 koi_teq_err2
## Min. :-77180.00 Min. : 25 Mode:logical Mode:logical
## 1st Qu.: -1.94 1st Qu.: 539 NA's:9564 NA's:9564
## Median : -0.30 Median : 878
## Mean : -33.02 Mean : 1085
## 3rd Qu.: -0.14 3rd Qu.: 1379
## Max. : 0.00 Max. :14667
## NA's :363 NA's :363
## koi_insol koi_insol_err1 koi_insol_err2 koi_model_snr
## Min. : 0 Min. : 0 Min. :-5600031 Min. : 0.0
## 1st Qu.: 20 1st Qu.: 9 1st Qu.: -287 1st Qu.: 12.0
## Median : 142 Median : 73 Median : -40 Median : 23.0
## Mean : 7746 Mean : 3751 Mean : -4044 Mean : 259.9
## 3rd Qu.: 870 3rd Qu.: 519 3rd Qu.: -5 3rd Qu.: 78.0
## Max. :10947555 Max. :3617133 Max. : 0 Max. :9054.7
## NA's :321 NA's :321 NA's :321 NA's :363
## koi_tce_plnt_num koi_tce_delivname koi_steff koi_steff_err1
## Min. :1.000 Length:9564 Min. : 2661 Min. : 0.0
## 1st Qu.:1.000 Class :character 1st Qu.: 5310 1st Qu.:106.0
## Median :1.000 Mode :character Median : 5767 Median :157.0
## Mean :1.244 Mean : 5707 Mean :144.6
## 3rd Qu.:1.000 3rd Qu.: 6112 3rd Qu.:174.0
## Max. :8.000 Max. :15896 Max. :676.0
## NA's :346 NA's :363 NA's :468
## koi_steff_err2 koi_slogg koi_slogg_err1 koi_slogg_err2
## Min. :-1762.0 Min. :0.047 Min. :0.0000 Min. :-1.2070
## 1st Qu.: -198.0 1st Qu.:4.218 1st Qu.:0.0420 1st Qu.:-0.1960
## Median : -160.0 Median :4.438 Median :0.0700 Median :-0.1280
## Mean : -162.3 Mean :4.310 Mean :0.1207 Mean :-0.1432
## 3rd Qu.: -114.0 3rd Qu.:4.543 3rd Qu.:0.1490 3rd Qu.:-0.0880
## Max. : 0.0 Max. :5.364 Max. :1.4720 Max. : 0.0000
## NA's :483 NA's :363 NA's :468 NA's :468
## koi_srad koi_srad_err1 koi_srad_err2 ra
## Min. : 0.109 Min. : 0.0000 Min. :-116.1370 Min. :279.9
## 1st Qu.: 0.829 1st Qu.: 0.1290 1st Qu.: -0.2500 1st Qu.:288.7
## Median : 1.000 Median : 0.2510 Median : -0.1110 Median :292.3
## Mean : 1.729 Mean : 0.3623 Mean : -0.3948 Mean :292.1
## 3rd Qu.: 1.345 3rd Qu.: 0.3640 3rd Qu.: -0.0690 3rd Qu.:295.9
## Max. :229.908 Max. :33.0910 Max. : 0.0000 Max. :301.7
## NA's :363 NA's :468 NA's :468
## dec koi_kepmag
## Min. :36.58 Min. : 6.966
## 1st Qu.:40.78 1st Qu.:13.440
## Median :43.68 Median :14.520
## Mean :43.81 Mean :14.265
## 3rd Qu.:46.71 3rd Qu.:15.322
## Max. :52.34 Max. :20.003
## NA's :1
#colSums(is.na(df_MLR))
na_counts <- sort(colSums(is.na(df_MLR)), decreasing = TRUE)
na_df <- data.frame(Missing_Values = na_counts)
print(na_df)
## Missing_Values
## koi_teq_err1 9564
## koi_teq_err2 9564
## koi_score 1510
## koi_steff_err2 483
## koi_steff_err1 468
## koi_slogg_err1 468
## koi_slogg_err2 468
## koi_srad_err1 468
## koi_srad_err2 468
## koi_period_err1 454
## koi_period_err2 454
## koi_time0bk_err1 454
## koi_time0bk_err2 454
## koi_impact_err1 454
## koi_impact_err2 454
## koi_duration_err1 454
## koi_duration_err2 454
## koi_depth_err1 454
## koi_depth_err2 454
## koi_impact 363
## koi_depth 363
## koi_prad 363
## koi_prad_err1 363
## koi_prad_err2 363
## koi_teq 363
## koi_model_snr 363
## koi_steff 363
## koi_slogg 363
## koi_srad 363
## koi_tce_plnt_num 346
## koi_insol 321
## koi_insol_err1 321
## koi_insol_err2 321
## koi_kepmag 1
## rowid 0
## kepid 0
## kepoi_name 0
## kepler_name 0
## koi_disposition 0
## koi_pdisposition 0
## koi_fpflag_nt 0
## koi_fpflag_ss 0
## koi_fpflag_co 0
## koi_fpflag_ec 0
## koi_period 0
## koi_time0bk 0
## koi_duration 0
## koi_tce_delivname 0
## ra 0
## dec 0
#hapus variabel yang tidak relevan dan bernilai NaN semua
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data_new <- df_MLR %>%
dplyr::select(-koi_teq_err1,-koi_teq_err2, -kepler_name, -koi_tce_delivname, -kepoi_name, -kepid, -rowid)
head(data_new)
## koi_disposition koi_pdisposition koi_score koi_fpflag_nt koi_fpflag_ss
## 1 CONFIRMED CANDIDATE 1.000 0 0
## 2 CONFIRMED CANDIDATE 0.969 0 0
## 3 FALSE POSITIVE FALSE POSITIVE 0.000 0 1
## 4 FALSE POSITIVE FALSE POSITIVE 0.000 0 1
## 5 CONFIRMED CANDIDATE 1.000 0 0
## 6 CONFIRMED CANDIDATE 1.000 0 0
## koi_fpflag_co koi_fpflag_ec koi_period koi_period_err1 koi_period_err2
## 1 0 0 9.488036 2.775e-05 -2.775e-05
## 2 0 0 54.418383 2.479e-04 -2.479e-04
## 3 0 0 19.899140 1.494e-05 -1.494e-05
## 4 0 0 1.736952 2.630e-07 -2.630e-07
## 5 0 0 2.525592 3.761e-06 -3.761e-06
## 6 0 0 11.094321 2.036e-05 -2.036e-05
## koi_time0bk koi_time0bk_err1 koi_time0bk_err2 koi_impact koi_impact_err1
## 1 170.5387 0.002160 -0.002160 0.146 0.318
## 2 162.5138 0.003520 -0.003520 0.586 0.059
## 3 175.8503 0.000581 -0.000581 0.969 5.126
## 4 170.3076 0.000115 -0.000115 1.276 0.115
## 5 171.5956 0.001130 -0.001130 0.701 0.235
## 6 171.2012 0.001410 -0.001410 0.538 0.030
## koi_impact_err2 koi_duration koi_duration_err1 koi_duration_err2 koi_depth
## 1 -0.146 2.95750 0.08190 -0.08190 615.8
## 2 -0.443 4.50700 0.11600 -0.11600 874.8
## 3 -0.077 1.78220 0.03410 -0.03410 10829.0
## 4 -0.092 2.40641 0.00537 -0.00537 8079.2
## 5 -0.478 1.65450 0.04200 -0.04200 603.3
## 6 -0.428 4.59450 0.06100 -0.06100 1517.5
## koi_depth_err1 koi_depth_err2 koi_prad koi_prad_err1 koi_prad_err2 koi_teq
## 1 19.5 -19.5 2.26 0.26 -0.15 793
## 2 35.5 -35.5 2.83 0.32 -0.19 443
## 3 171.0 -171.0 14.60 3.92 -1.31 638
## 4 12.8 -12.8 33.46 8.50 -2.83 1395
## 5 16.9 -16.9 2.75 0.88 -0.35 1406
## 6 24.2 -24.2 3.90 1.27 -0.42 835
## koi_insol koi_insol_err1 koi_insol_err2 koi_model_snr koi_tce_plnt_num
## 1 93.59 29.45 -16.65 35.8 1
## 2 9.11 2.87 -1.62 25.8 2
## 3 39.30 31.04 -10.49 76.3 1
## 4 891.96 668.95 -230.35 505.6 1
## 5 926.16 874.33 -314.24 40.9 1
## 6 114.81 112.85 -36.70 66.5 1
## koi_steff koi_steff_err1 koi_steff_err2 koi_slogg koi_slogg_err1
## 1 5455 81 -81 4.467 0.064
## 2 5455 81 -81 4.467 0.064
## 3 5853 158 -176 4.544 0.044
## 4 5805 157 -174 4.564 0.053
## 5 6031 169 -211 4.438 0.070
## 6 6046 189 -232 4.486 0.054
## koi_slogg_err2 koi_srad koi_srad_err1 koi_srad_err2 ra dec
## 1 -0.096 0.927 0.105 -0.061 291.9342 48.14165
## 2 -0.096 0.927 0.105 -0.061 291.9342 48.14165
## 3 -0.176 0.868 0.233 -0.078 297.0048 48.13413
## 4 -0.168 0.791 0.201 -0.067 285.5346 48.28521
## 5 -0.210 1.046 0.334 -0.133 288.7549 48.22620
## 6 -0.229 0.972 0.315 -0.105 296.2861 48.22467
## koi_kepmag
## 1 15.347
## 2 15.347
## 3 15.436
## 4 15.597
## 5 15.509
## 6 15.714
str(data_new)
## 'data.frame': 9564 obs. of 43 variables:
## $ koi_disposition : chr "CONFIRMED" "CONFIRMED" "FALSE POSITIVE" "FALSE POSITIVE" ...
## $ koi_pdisposition : chr "CANDIDATE" "CANDIDATE" "FALSE POSITIVE" "FALSE POSITIVE" ...
## $ koi_score : num 1 0.969 0 0 1 1 1 0.992 0 1 ...
## $ koi_fpflag_nt : int 0 0 0 0 0 0 0 0 0 0 ...
## $ koi_fpflag_ss : int 0 0 1 1 0 0 0 0 1 0 ...
## $ koi_fpflag_co : int 0 0 0 0 0 0 0 0 1 0 ...
## $ koi_fpflag_ec : int 0 0 0 0 0 0 0 0 0 0 ...
## $ koi_period : num 9.49 54.42 19.9 1.74 2.53 ...
## $ koi_period_err1 : num 2.78e-05 2.48e-04 1.49e-05 2.63e-07 3.76e-06 ...
## $ koi_period_err2 : num -2.78e-05 -2.48e-04 -1.49e-05 -2.63e-07 -3.76e-06 ...
## $ koi_time0bk : num 171 163 176 170 172 ...
## $ koi_time0bk_err1 : num 0.00216 0.00352 0.000581 0.000115 0.00113 0.00141 0.0019 0.00461 0.00253 0.000517 ...
## $ koi_time0bk_err2 : num -0.00216 -0.00352 -0.000581 -0.000115 -0.00113 -0.00141 -0.0019 -0.00461 -0.00253 -0.000517 ...
## $ koi_impact : num 0.146 0.586 0.969 1.276 0.701 ...
## $ koi_impact_err1 : num 0.318 0.059 5.126 0.115 0.235 ...
## $ koi_impact_err2 : num -0.146 -0.443 -0.077 -0.092 -0.478 -0.428 -0.532 -0.523 -0.044 -0.052 ...
## $ koi_duration : num 2.96 4.51 1.78 2.41 1.65 ...
## $ koi_duration_err1: num 0.0819 0.116 0.0341 0.00537 0.042 0.061 0.0673 0.165 0.136 0.0241 ...
## $ koi_duration_err2: num -0.0819 -0.116 -0.0341 -0.00537 -0.042 -0.061 -0.0673 -0.165 -0.136 -0.0241 ...
## $ koi_depth : num 616 875 10829 8079 603 ...
## $ koi_depth_err1 : num 19.5 35.5 171 12.8 16.9 24.2 18.7 16.8 5.8 33.3 ...
## $ koi_depth_err2 : num -19.5 -35.5 -171 -12.8 -16.9 -24.2 -18.7 -16.8 -5.8 -33.3 ...
## $ koi_prad : num 2.26 2.83 14.6 33.46 2.75 ...
## $ koi_prad_err1 : num 0.26 0.32 3.92 8.5 0.88 1.27 0.9 0.52 6.45 0.22 ...
## $ koi_prad_err2 : num -0.15 -0.19 -1.31 -2.83 -0.35 -0.42 -0.3 -0.17 -9.67 -0.49 ...
## $ koi_teq : num 793 443 638 1395 1406 ...
## $ koi_insol : num 93.59 9.11 39.3 891.96 926.16 ...
## $ koi_insol_err1 : num 29.45 2.87 31.04 668.95 874.33 ...
## $ koi_insol_err2 : num -16.65 -1.62 -10.49 -230.35 -314.24 ...
## $ koi_model_snr : num 35.8 25.8 76.3 505.6 40.9 ...
## $ koi_tce_plnt_num : int 1 2 1 1 1 1 2 3 1 1 ...
## $ koi_steff : num 5455 5455 5853 5805 6031 ...
## $ koi_steff_err1 : num 81 81 158 157 169 189 189 189 111 75 ...
## $ koi_steff_err2 : num -81 -81 -176 -174 -211 -232 -232 -232 -124 -83 ...
## $ koi_slogg : num 4.47 4.47 4.54 4.56 4.44 ...
## $ koi_slogg_err1 : num 0.064 0.064 0.044 0.053 0.07 0.054 0.054 0.054 0.182 0.083 ...
## $ koi_slogg_err2 : num -0.096 -0.096 -0.176 -0.168 -0.21 -0.229 -0.229 -0.229 -0.098 -0.028 ...
## $ koi_srad : num 0.927 0.927 0.868 0.791 1.046 ...
## $ koi_srad_err1 : num 0.105 0.105 0.233 0.201 0.334 0.315 0.315 0.315 0.322 0.033 ...
## $ koi_srad_err2 : num -0.061 -0.061 -0.078 -0.067 -0.133 -0.105 -0.105 -0.105 -0.483 -0.072 ...
## $ ra : num 292 292 297 286 289 ...
## $ dec : num 48.1 48.1 48.1 48.3 48.2 ...
## $ koi_kepmag : num 15.3 15.3 15.4 15.6 15.5 ...
#penanganan missing value dengan kNN
#install.packages("VIM")
library(VIM)
## Warning: package 'VIM' was built under R version 4.4.3
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
imputed_data <- kNN(data_new, k=5, imp_var = FALSE)
head(imputed_data)
## koi_disposition koi_pdisposition koi_score koi_fpflag_nt koi_fpflag_ss
## 1 CONFIRMED CANDIDATE 1.000 0 0
## 2 CONFIRMED CANDIDATE 0.969 0 0
## 3 FALSE POSITIVE FALSE POSITIVE 0.000 0 1
## 4 FALSE POSITIVE FALSE POSITIVE 0.000 0 1
## 5 CONFIRMED CANDIDATE 1.000 0 0
## 6 CONFIRMED CANDIDATE 1.000 0 0
## koi_fpflag_co koi_fpflag_ec koi_period koi_period_err1 koi_period_err2
## 1 0 0 9.488036 2.775e-05 -2.775e-05
## 2 0 0 54.418383 2.479e-04 -2.479e-04
## 3 0 0 19.899140 1.494e-05 -1.494e-05
## 4 0 0 1.736952 2.630e-07 -2.630e-07
## 5 0 0 2.525592 3.761e-06 -3.761e-06
## 6 0 0 11.094321 2.036e-05 -2.036e-05
## koi_time0bk koi_time0bk_err1 koi_time0bk_err2 koi_impact koi_impact_err1
## 1 170.5387 0.002160 -0.002160 0.146 0.318
## 2 162.5138 0.003520 -0.003520 0.586 0.059
## 3 175.8503 0.000581 -0.000581 0.969 5.126
## 4 170.3076 0.000115 -0.000115 1.276 0.115
## 5 171.5956 0.001130 -0.001130 0.701 0.235
## 6 171.2012 0.001410 -0.001410 0.538 0.030
## koi_impact_err2 koi_duration koi_duration_err1 koi_duration_err2 koi_depth
## 1 -0.146 2.95750 0.08190 -0.08190 615.8
## 2 -0.443 4.50700 0.11600 -0.11600 874.8
## 3 -0.077 1.78220 0.03410 -0.03410 10829.0
## 4 -0.092 2.40641 0.00537 -0.00537 8079.2
## 5 -0.478 1.65450 0.04200 -0.04200 603.3
## 6 -0.428 4.59450 0.06100 -0.06100 1517.5
## koi_depth_err1 koi_depth_err2 koi_prad koi_prad_err1 koi_prad_err2 koi_teq
## 1 19.5 -19.5 2.26 0.26 -0.15 793
## 2 35.5 -35.5 2.83 0.32 -0.19 443
## 3 171.0 -171.0 14.60 3.92 -1.31 638
## 4 12.8 -12.8 33.46 8.50 -2.83 1395
## 5 16.9 -16.9 2.75 0.88 -0.35 1406
## 6 24.2 -24.2 3.90 1.27 -0.42 835
## koi_insol koi_insol_err1 koi_insol_err2 koi_model_snr koi_tce_plnt_num
## 1 93.59 29.45 -16.65 35.8 1
## 2 9.11 2.87 -1.62 25.8 2
## 3 39.30 31.04 -10.49 76.3 1
## 4 891.96 668.95 -230.35 505.6 1
## 5 926.16 874.33 -314.24 40.9 1
## 6 114.81 112.85 -36.70 66.5 1
## koi_steff koi_steff_err1 koi_steff_err2 koi_slogg koi_slogg_err1
## 1 5455 81 -81 4.467 0.064
## 2 5455 81 -81 4.467 0.064
## 3 5853 158 -176 4.544 0.044
## 4 5805 157 -174 4.564 0.053
## 5 6031 169 -211 4.438 0.070
## 6 6046 189 -232 4.486 0.054
## koi_slogg_err2 koi_srad koi_srad_err1 koi_srad_err2 ra dec
## 1 -0.096 0.927 0.105 -0.061 291.9342 48.14165
## 2 -0.096 0.927 0.105 -0.061 291.9342 48.14165
## 3 -0.176 0.868 0.233 -0.078 297.0048 48.13413
## 4 -0.168 0.791 0.201 -0.067 285.5346 48.28521
## 5 -0.210 1.046 0.334 -0.133 288.7549 48.22620
## 6 -0.229 0.972 0.315 -0.105 296.2861 48.22467
## koi_kepmag
## 1 15.347
## 2 15.347
## 3 15.436
## 4 15.597
## 5 15.509
## 6 15.714
na_counts <- sort(colSums(is.na(imputed_data)), decreasing = TRUE)
na_df <- data.frame(Missing_Values = na_counts)
print(na_df)
## Missing_Values
## koi_disposition 0
## koi_pdisposition 0
## koi_score 0
## koi_fpflag_nt 0
## koi_fpflag_ss 0
## koi_fpflag_co 0
## koi_fpflag_ec 0
## koi_period 0
## koi_period_err1 0
## koi_period_err2 0
## koi_time0bk 0
## koi_time0bk_err1 0
## koi_time0bk_err2 0
## koi_impact 0
## koi_impact_err1 0
## koi_impact_err2 0
## koi_duration 0
## koi_duration_err1 0
## koi_duration_err2 0
## koi_depth 0
## koi_depth_err1 0
## koi_depth_err2 0
## koi_prad 0
## koi_prad_err1 0
## koi_prad_err2 0
## koi_teq 0
## koi_insol 0
## koi_insol_err1 0
## koi_insol_err2 0
## koi_model_snr 0
## koi_tce_plnt_num 0
## koi_steff 0
## koi_steff_err1 0
## koi_steff_err2 0
## koi_slogg 0
## koi_slogg_err1 0
## koi_slogg_err2 0
## koi_srad 0
## koi_srad_err1 0
## koi_srad_err2 0
## ra 0
## dec 0
## koi_kepmag 0
#standarisasi
numeric_data <- imputed_data[, sapply(imputed_data, is.numeric)]
categorical_data <- imputed_data[, sapply(imputed_data, Negate(is.numeric))]
binary_vars <- names(numeric_data)[sapply(numeric_data, function(x) length(unique(x)) == 2)]
numeric_vars <- setdiff(names(numeric_data), binary_vars)
scaled_numeric <- scale(numeric_data[, numeric_vars])
# Gabungkan kembali: scaled + biner + kategorikal
final_data <- cbind(as.data.frame(scaled_numeric),
numeric_data[, binary_vars, drop = FALSE],
categorical_data)
# ENCODING LABEL
final_data$koi_disposition_enc <- as.integer(factor(final_data$koi_disposition,
levels = c("FALSE POSITIVE", "CANDIDATE", "CONFIRMED"))) - 1
final_data$koi_pdisposition_enc <- as.integer(factor(final_data$koi_pdisposition,
levels = c("FALSE POSITIVE", "CANDIDATE", "CONFIRMED"))) - 1
table(final_data$koi_disposition, final_data$koi_disposition_enc)
##
## 0 1 2
## CANDIDATE 0 2248 0
## CONFIRMED 0 0 2293
## FALSE POSITIVE 5023 0 0
table(final_data$koi_pdisposition, final_data$koi_pdisposition_enc)
##
## 0 1
## CANDIDATE 0 4496
## FALSE POSITIVE 5068 0
head(final_data[, c("koi_disposition", "koi_disposition_enc",
"koi_pdisposition", "koi_pdisposition_enc")])
## koi_disposition koi_disposition_enc koi_pdisposition koi_pdisposition_enc
## 1 CONFIRMED 2 CANDIDATE 1
## 2 CONFIRMED 2 CANDIDATE 1
## 3 FALSE POSITIVE 0 FALSE POSITIVE 0
## 4 FALSE POSITIVE 0 FALSE POSITIVE 0
## 5 CONFIRMED 2 CANDIDATE 1
## 6 CONFIRMED 2 CANDIDATE 1
final_data <- final_data%>%
dplyr::select(-koi_disposition, -koi_pdisposition, -koi_pdisposition_enc)
#install.packages('MASS')
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
outlier <- mahalanobis(scaled_numeric, colMeans(scaled_numeric), cov(scaled_numeric))
threshold <- qchisq(p = 0.99, df = ncol(scaled_numeric))
sum(outlier > threshold)
## [1] 600
outlier_index <- which(outlier > threshold)
head(final_data[outlier_index, ])
## koi_score koi_period koi_period_err1 koi_period_err2 koi_time0bk
## 12 1.1724578 -0.05504173 -0.2573629 0.2575222 -0.65997344
## 30 -0.9577444 0.44575900 -0.1038511 0.1040320 0.63105059
## 202 1.1724578 -0.05353116 -0.2570476 0.2572069 0.06535999
## 230 1.1724578 -0.04715416 -0.2568226 0.2569819 0.15836231
## 250 1.1724578 -0.04856580 -0.2553441 0.2555037 0.08278379
## 251 1.1724578 -0.05169757 -0.2530879 0.2532478 -0.42320099
## koi_time0bk_err1 koi_time0bk_err2 koi_impact koi_impact_err1
## 12 -0.4328947 0.4331666 -0.152137993 -0.1865190
## 30 -0.3642798 0.3645617 -0.006944545 -0.2033544
## 202 -0.4119216 0.4121967 -0.005726990 -0.1971633
## 230 -0.4215472 0.4218208 0.026538220 -0.2016165
## 250 -0.3797336 0.3800133 -0.156399435 -0.1802193
## 251 -0.2547786 0.2550765 -0.024294706 -0.1705525
## koi_impact_err2 koi_duration koi_duration_err1 koi_duration_err2
## 12 0.08853701 -0.2677821 -0.5099980 0.5101836
## 30 0.26137959 7.3846862 -0.3149687 0.3151865
## 202 0.01071690 -0.3919007 -0.4155304 0.4157316
## 230 0.23844314 -0.5274323 -0.4477404 0.4479363
## 250 0.09345197 -0.4052051 -0.4478916 0.4480875
## 251 -0.09905043 -0.5727537 -0.3134565 0.3136745
## koi_depth koi_depth_err1 koi_depth_err2 koi_prad koi_prad_err1
## 12 -0.20579166 -0.03056899 0.03058046 -0.02756301 -0.04216644
## 30 -0.06525387 -0.02264661 0.02265810 -0.02821230 -0.03872587
## 202 -0.26013783 -0.02653306 0.02654454 -0.03202848 -0.04276593
## 230 -0.20836451 -0.02077813 0.02078962 -0.03128314 -0.04174940
## 250 -0.26733195 -0.02381753 0.02382901 -0.03212124 -0.04313084
## 251 -0.28349904 -0.02349366 0.02350514 -0.03250551 -0.04373033
## koi_prad_err2 koi_teq koi_insol koi_insol_err1 koi_insol_err2
## 12 0.02636101 1.1232139 -0.02219936 -0.05901247 0.03809786
## 30 0.02653185 -1.0556407 -0.04870545 -0.07104560 0.04562884
## 202 0.02664290 -0.5862222 -0.04851805 -0.07028697 0.04544482
## 230 0.02631829 -0.7779896 -0.04865517 -0.07083022 0.04557924
## 250 0.02677103 -0.6991650 -0.04861402 -0.07062925 0.04553470
## 251 0.02695041 -0.5956341 -0.04852840 -0.07024578 0.04544816
## koi_model_snr koi_tce_plnt_num koi_steff koi_steff_err1 koi_steff_err2
## 12 7.28284217 -0.3612793 0.9343576 -1.4923649 1.0279538
## 30 0.49975558 -0.3612793 -0.2425452 -0.9743788 0.7192296
## 202 -0.12045839 -0.3612793 -2.3955097 7.2486508 -1.3576423
## 230 -0.05305217 -0.3612793 -2.2509553 8.9536885 -1.9470248
## 250 -0.23979425 -0.3612793 -1.9298656 11.4572881 -3.5608104
## 251 -0.30413074 1.1622107 -1.9298656 11.4572881 -3.5608104
## koi_slogg koi_slogg_err1 koi_slogg_err2 koi_srad koi_srad_err1
## 12 -0.6964601 -0.6575892 1.3904090 0.04174523 -0.2849172
## 30 0.3552572 -0.6652649 0.2949655 -0.11944897 -0.2309724
## 202 1.0164039 -0.1049396 0.6012185 -0.19696857 -0.2684035
## 230 0.9552302 -0.1126153 0.7072292 -0.19297615 -0.2761099
## 250 0.8634696 -0.2354263 0.8603557 -0.18465859 -0.2739081
## 251 0.8634696 -0.2354263 0.8603557 -0.18465859 -0.2739081
## koi_srad_err2 ra dec koi_kepmag koi_fpflag_nt
## 12 0.1281354 0.03925539 1.15490354 -2.7441164 0
## 30 0.1569711 0.97310271 1.52892943 0.6802056 1
## 202 0.1262445 0.97212299 -0.32886512 1.0302891 0
## 230 0.1295535 -1.44602029 0.01830088 1.0721548 0
## 250 0.1286081 0.73829245 0.20424225 0.5726542 0
## 251 0.1286081 0.73829245 0.20424225 0.5726542 0
## koi_fpflag_ss koi_fpflag_co koi_fpflag_ec koi_disposition_enc
## 12 1 0 0 2
## 30 0 0 0 0
## 202 0 0 0 1
## 230 0 0 0 1
## 250 0 0 0 2
## 251 0 0 0 2
final_selected <- final_data[-outlier_index, ]
### 4. FEATURE SELECTION - STEPWISE AIC ###
model_full <- lm(koi_disposition_enc ~ ., data = final_selected)
model_step <- stepAIC(model_full, direction = "both", trace = FALSE)
selected_vars <- names(coef(model_step))[-1]
print("Fitur terpilih:")
## [1] "Fitur terpilih:"
print(selected_vars)
## [1] "koi_score" "koi_period" "koi_period_err1"
## [4] "koi_time0bk" "koi_time0bk_err1" "koi_time0bk_err2"
## [7] "koi_impact" "koi_impact_err1" "koi_impact_err2"
## [10] "koi_duration" "koi_duration_err2" "koi_depth"
## [13] "koi_prad" "koi_prad_err1" "koi_teq"
## [16] "koi_insol_err1" "koi_tce_plnt_num" "koi_steff"
## [19] "koi_steff_err1" "koi_slogg" "koi_slogg_err1"
## [22] "koi_slogg_err2" "koi_srad" "koi_srad_err1"
## [25] "koi_srad_err2" "ra" "dec"
## [28] "koi_fpflag_nt" "koi_fpflag_ss" "koi_fpflag_co"
final_selected_data <- final_selected[, c("koi_disposition_enc", selected_vars)]
library(tidyr)
library(ggplot2)
library(dplyr)
final_selected_data <- final_selected_data %>%
mutate(Row = row_number())
long_data <- final_selected_data %>%
pivot_longer(cols = all_of(selected_vars), names_to = "Variable", values_to = "Value") %>%
mutate(Outlier = ifelse(Row %in% outlier_index, "Outlier", "Normal"))
# Buat boxplot
ggplot(long_data, aes(x = Variable, y = Value, fill = Outlier)) +
geom_boxplot(outlier.shape = NA, position = position_dodge(width = 0.75)) +
geom_jitter(aes(color = Outlier), width = 0.2, alpha = 0.4) +
scale_fill_manual(values = c("Normal" = "grey80", "Outlier" = "red")) +
scale_color_manual(values = c("Normal" = "black", "Outlier" = "red")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Boxplot per Variabel dengan Highlight Outlier (Mahalanobis)",
y = "Value", x = "Variabel")

# Visualisasi boxplot
ggplot(long_data, aes(x = Variable, y = Value, fill = Outlier)) +
geom_boxplot(outlier.shape = NA, position = position_dodge(width = 0.75)) +
geom_jitter(aes(color = Outlier), width = 0.2, alpha = 0.4) +
scale_fill_manual(values = c("Normal" = "grey80", "Outlier" = "red")) +
scale_color_manual(values = c("Normal" = "black", "Outlier" = "red")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Boxplot per Variabel dengan Highlight Outlier (Mahalanobis)",
y = "Value", x = "Variabel")

# PCA sebelum outlier dihapus
pca_before <- prcomp(scaled_numeric)
pca_before_df <- as.data.frame(pca_before$x[, 1:2])
pca_before_df$outlier <- outlier > threshold
# PCA sesudah outlier dihapus
scaled_numeric_clean <- final_selected_data[, sapply(final_selected_data, is.numeric)]
pca_after <- prcomp(scaled_numeric_clean)
pca_after_df <- as.data.frame(pca_after$x[, 1:2])
# Visualisasi
library(ggplot2)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.4.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
p1 <- ggplot(pca_before_df, aes(x = PC1, y = PC2, color = outlier)) +
geom_point(alpha = 0.6) +
scale_color_manual(values = c("black", "red")) +
labs(title = "Sebelum Outlier Dihapus", color = "Outlier") +
theme_minimal()
p2 <- ggplot(pca_after_df, aes(x = PC1, y = PC2)) +
geom_point(alpha = 0.6, color = "blue") +
labs(title = "Setelah Outlier Dihapus") +
theme_minimal()
gridExtra::grid.arrange(p1, p2, nrow = 1)

ncol(final_selected_data)
## [1] 32
names(final_selected_data)
## [1] "koi_disposition_enc" "koi_score" "koi_period"
## [4] "koi_period_err1" "koi_time0bk" "koi_time0bk_err1"
## [7] "koi_time0bk_err2" "koi_impact" "koi_impact_err1"
## [10] "koi_impact_err2" "koi_duration" "koi_duration_err2"
## [13] "koi_depth" "koi_prad" "koi_prad_err1"
## [16] "koi_teq" "koi_insol_err1" "koi_tce_plnt_num"
## [19] "koi_steff" "koi_steff_err1" "koi_slogg"
## [22] "koi_slogg_err1" "koi_slogg_err2" "koi_srad"
## [25] "koi_srad_err1" "koi_srad_err2" "ra"
## [28] "dec" "koi_fpflag_nt" "koi_fpflag_ss"
## [31] "koi_fpflag_co" "Row"
final_selected_data$kelas <- final_selected_data$koi_disposition_enc
length(final_selected_data$kelas) # Harus 8964
## [1] 8964
#IQR
iqr_zero <- sapply(final_selected_data[, 1:32], function(x) IQR(x) == 0)
which(iqr_zero)
## koi_tce_plnt_num koi_fpflag_nt koi_fpflag_ss koi_fpflag_co
## 18 29 30 31
datacek <- final_selected_data[, !iqr_zero]
ncol(datacek)
## [1] 29
names(datacek)
## [1] "koi_disposition_enc" "koi_score" "koi_period"
## [4] "koi_period_err1" "koi_time0bk" "koi_time0bk_err1"
## [7] "koi_time0bk_err2" "koi_impact" "koi_impact_err1"
## [10] "koi_impact_err2" "koi_duration" "koi_duration_err2"
## [13] "koi_depth" "koi_prad" "koi_prad_err1"
## [16] "koi_teq" "koi_insol_err1" "koi_steff"
## [19] "koi_steff_err1" "koi_slogg" "koi_slogg_err1"
## [22] "koi_slogg_err2" "koi_srad" "koi_srad_err1"
## [25] "koi_srad_err2" "ra" "dec"
## [28] "Row" "kelas"
#install.packages("caret")
#install.packages("future")
#installed.packages()["caret", ]
#FEATURE SELECTION dengan Korelasi
library(caret, verbose = TRUE)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
cor_matrix <- cor(datacek)
high_corr <- findCorrelation(cor_matrix, cutoff = 0.9)
data_final_for_mvn <- datacek[, -high_corr]
ncol(data_final_for_mvn)
## [1] 26
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
current_data <- data_final_for_mvn
repeat {
model <- lm(kelas ~ ., data = current_data)
vif_values <- vif(model)
max_vif <- max(vif_values)
if (max_vif <= 5) break
feature_to_remove <- names(which.max(vif_values))
cat("Menghapus:", feature_to_remove, "dengan VIF =", max_vif, "\n")
current_data <- current_data[, !(names(current_data) %in% feature_to_remove)]
}
## Menghapus: koi_srad dengan VIF = 22.78738
## Menghapus: koi_slogg dengan VIF = 5.583075
#install.packages("rrcov")
#install.packages("biotools")
library(rrcov)
## Warning: package 'rrcov' was built under R version 4.4.3
## Loading required package: robustbase
## Warning: package 'robustbase' was built under R version 4.4.3
## Scalable Robust Estimators with High Breakdown Point (version 1.7-7)
library(biotools)
## Warning: package 'biotools' was built under R version 4.4.3
## ---
## biotools version 4.3
length(data_final_for_mvn)
## [1] 26
ncol(data_final_for_mvn)
## [1] 26
length(data_final_for_mvn$kelas) # Cek jumlah baris dari kolom `kelas`
## [1] 8964
"kelas" %in% names(data_final_for_mvn) # Apakah kolom `kelas` ada?
## [1] TRUE
str(data_final_for_mvn$kelas) # Lihat struktur kolom `kelas`
## num [1:8964] 2 2 0 0 2 2 2 2 0 2 ...
"kelas" %in% names(current_data) # Harus TRUE
## [1] TRUE
str(current_data)
## 'data.frame': 8964 obs. of 24 variables:
## $ koi_score : num 1.172 1.106 -0.958 -0.958 1.172 ...
## $ koi_period : num -0.0496 -0.0159 -0.0418 -0.0554 -0.0548 ...
## $ koi_period_err1 : num -0.254 -0.227 -0.256 -0.257 -0.257 ...
## $ koi_time0bk : num 0.0641 -0.054 0.1423 0.0607 0.0797 ...
## $ koi_time0bk_err2 : num 0.339 0.278 0.408 0.429 0.384 ...
## $ koi_impact : num -0.17588 -0.04195 0.07463 0.16808 -0.00694 ...
## $ koi_impact_err1 : num -0.169 -0.197 0.353 -0.191 -0.178 ...
## $ koi_impact_err2 : num 0.1459 -0.0974 0.2024 0.1901 -0.1261 ...
## $ koi_duration : num -0.412 -0.172 -0.593 -0.497 -0.613 ...
## $ koi_duration_err2: num 0.389 0.338 0.462 0.505 0.45 ...
## $ koi_depth : num -0.28 -0.277 -0.155 -0.189 -0.281 ...
## $ koi_prad_err1 : num -0.0436 -0.0434 -0.0341 -0.0221 -0.042 ...
## $ koi_teq : num -0.353 -0.765 -0.536 0.355 0.368 ...
## $ koi_insol_err1 : num -0.0705 -0.071 -0.0705 -0.0587 -0.0549 ...
## $ koi_steff : num -0.326 -0.326 0.183 0.122 0.411 ...
## $ koi_steff_err1 : num -1.384 -1.384 0.277 0.256 0.515 ...
## $ koi_slogg_err1 : num -0.42 -0.42 -0.573 -0.504 -0.374 ...
## $ koi_slogg_err2 : num 0.578 0.578 -0.365 -0.27 -0.765 ...
## $ koi_srad_err1 : num -0.2783 -0.2783 -0.1374 -0.1726 -0.0262 ...
## $ koi_srad_err2 : num 0.151 0.151 0.143 0.148 0.117 ...
## $ ra : num -0.0264 -0.0264 1.0373 -1.369 -0.6934 ...
## $ dec : num 1.2 1.2 1.2 1.24 1.23 ...
## $ Row : int 1 2 3 4 5 6 7 8 9 10 ...
## $ kelas : num 2 2 0 0 2 2 2 2 0 2 ...
ncol(current_data)
## [1] 24
#ga transformed
library(MVN)
## Warning: package 'MVN' was built under R version 4.4.3
mvn_result <- mvn(current_data[, -which(names(current_data) == "kelas")], mvnTest = "hz")
print(mvn_result$multivariateNormality)
## Test HZ p value MVN
## 1 Henze-Zirkler 10.46139 0 NO
print(mvn_result$univariateNormality)
## Test Variable Statistic p value Normality
## 1 Anderson-Darling koi_score 1221.0027 <0.001 NO
## 2 Anderson-Darling koi_period 1674.9582 <0.001 NO
## 3 Anderson-Darling koi_period_err1 2419.1548 <0.001 NO
## 4 Anderson-Darling koi_time0bk 1398.9249 <0.001 NO
## 5 Anderson-Darling koi_time0bk_err2 895.9801 <0.001 NO
## 6 Anderson-Darling koi_impact 203.4474 <0.001 NO
## 7 Anderson-Darling koi_impact_err1 2802.2317 <0.001 NO
## 8 Anderson-Darling koi_impact_err2 240.7686 <0.001 NO
## 9 Anderson-Darling koi_duration 696.0201 <0.001 NO
## 10 Anderson-Darling koi_duration_err2 1000.8796 <0.001 NO
## 11 Anderson-Darling koi_depth 2495.1307 <0.001 NO
## 12 Anderson-Darling koi_prad_err1 2037.9700 <0.001 NO
## 13 Anderson-Darling koi_teq 235.5159 <0.001 NO
## 14 Anderson-Darling koi_insol_err1 2215.4798 <0.001 NO
## 15 Anderson-Darling koi_steff 131.2111 <0.001 NO
## 16 Anderson-Darling koi_steff_err1 244.3561 <0.001 NO
## 17 Anderson-Darling koi_slogg_err1 769.9105 <0.001 NO
## 18 Anderson-Darling koi_slogg_err2 59.1611 <0.001 NO
## 19 Anderson-Darling koi_srad_err1 592.1514 <0.001 NO
## 20 Anderson-Darling koi_srad_err2 1527.2079 <0.001 NO
## 21 Anderson-Darling ra 43.3719 <0.001 NO
## 22 Anderson-Darling dec 58.5571 <0.001 NO
## 23 Anderson-Darling Row 99.6392 <0.001 NO
#install.packages("bestNormalize")
library(bestNormalize)
## Warning: package 'bestNormalize' was built under R version 4.4.3
##
## Attaching package: 'bestNormalize'
## The following object is masked from 'package:MASS':
##
## boxcox
# Pisahkan kolom kelas
kelas_col <- current_data$kelas
data_to_transform <- current_data[, names(current_data) != "kelas"]
data_transformed_part <- as.data.frame(lapply(data_to_transform, function(x) bestNormalize(x)$x.t))
## Warning: `progress_estimated()` was deprecated in dplyr 1.0.0.
## ℹ The deprecated feature was likely used in the bestNormalize package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
data_transformed <- cbind(data_transformed_part, kelas = kelas_col)
ncol(data_transformed)
## [1] 24
#uji multivariate normalitas HZ test
library(MVN)
mvn_result <- mvn(data_transformed[, -which(names(data_transformed) == "kelas")], mvnTest = "hz")
print(mvn_result$multivariateNormality)
## Test HZ p value MVN
## 1 Henze-Zirkler 1.838561 0 NO
print(mvn_result$univariateNormality)
## Test Variable Statistic p value Normality
## 1 Anderson-Darling koi_score 553.7566 <0.001 NO
## 2 Anderson-Darling koi_period 0.0002 1 YES
## 3 Anderson-Darling koi_period_err1 0.0091 1 YES
## 4 Anderson-Darling koi_time0bk 0.0002 1 YES
## 5 Anderson-Darling koi_time0bk_err2 0.0129 1 YES
## 6 Anderson-Darling koi_impact 0.0651 0.9996 YES
## 7 Anderson-Darling koi_impact_err1 0.2552 0.7273 YES
## 8 Anderson-Darling koi_impact_err2 0.2658 0.6918 YES
## 9 Anderson-Darling koi_duration 0.0003 1 YES
## 10 Anderson-Darling koi_duration_err2 0.0743 0.9992 YES
## 11 Anderson-Darling koi_depth 0.0059 1 YES
## 12 Anderson-Darling koi_prad_err1 5.5044 <0.001 NO
## 13 Anderson-Darling koi_teq 0.0111 1 YES
## 14 Anderson-Darling koi_insol_err1 0.2591 0.7145 YES
## 15 Anderson-Darling koi_steff 0.3902 0.3821 YES
## 16 Anderson-Darling koi_steff_err1 0.6482 0.0908 YES
## 17 Anderson-Darling koi_slogg_err1 0.7421 0.0532 YES
## 18 Anderson-Darling koi_slogg_err2 0.5005 0.208 YES
## 19 Anderson-Darling koi_srad_err1 0.0541 0.9998 YES
## 20 Anderson-Darling koi_srad_err2 0.1462 0.9676 YES
## 21 Anderson-Darling ra 0.0005 1 YES
## 22 Anderson-Darling dec 0.0004 1 YES
## 23 Anderson-Darling Row 0.0002 1 YES
#UJI HOMOGEN
library(biotools)
# kelas hanya sebagai grouping
boxM(data_transformed[, -which(names(data_transformed) == "kelas")],
grouping = data_transformed$kelas)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: data_transformed[, -which(names(data_transformed) == "kelas")]
## Chi-Sq (approx.) = 40304, df = 552, p-value < 2.2e-16
hasil_boxM <- boxM(current_data[, 1:22], grouping = current_data$kelas)
print(hasil_boxM)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: current_data[, 1:22]
## Chi-Sq (approx.) = 99113, df = 506, p-value < 2.2e-16
hasil_boxM$p.value
## [1] 0
###########################ANALISIS DISKRIMINAN#########################
set.seed(123)
train_index <- sample(nrow(data_transformed), size = floor(0.75 * nrow(data_transformed)), replace = FALSE)
training_data <- data_transformed[train_index, ]
test_data <- data_transformed[-train_index, ]
#masuk fungsi diskriminan
linearDA <- lda(formula = kelas ~., data = training_data)
linearDA
## Call:
## lda(kelas ~ ., data = training_data)
##
## Prior probabilities of groups:
## 0 1 2
## 0.5061728 0.2411126 0.2527146
##
## Group means:
## koi_score koi_period koi_period_err1 koi_time0bk koi_time0bk_err2
## 0 -0.6839720 -0.15086598 -0.195069269 -0.1400124 0.07781727
## 1 0.5762906 0.24135010 0.408379972 0.1280168 -0.43851852
## 2 0.9312293 0.07605598 0.009295873 0.1392560 0.23311219
## koi_impact koi_impact_err1 koi_impact_err2 koi_duration koi_duration_err2
## 0 0.2740853 -0.01269439 0.2015503 0.11276005 0.04458291
## 1 -0.2207740 0.04194091 -0.2171955 -0.13410525 -0.41033633
## 2 -0.3432967 -0.04676345 -0.2192614 -0.07238414 0.28586718
## koi_depth koi_prad_err1 koi_teq koi_insol_err1 koi_steff
## 0 0.164591477 0.2882493 0.2308983 0.2641781 0.19841377
## 1 -0.366797448 -0.2386574 -0.2485578 -0.2202241 -0.06448707
## 2 -0.009891373 -0.3578376 -0.2044547 -0.3040914 -0.31627542
## koi_steff_err1 koi_slogg_err1 koi_slogg_err2 koi_srad_err1 koi_srad_err2
## 0 0.3677895 0.12609774 -0.25145637 0.29465025 -0.18160134
## 1 -0.1464673 -0.02689074 0.04295088 -0.02652089 0.01545131
## 2 -0.6061518 -0.22920823 0.49715852 -0.56120799 0.34682786
## ra dec Row
## 0 0.12853300 -0.09118499 0.3624977
## 1 -0.04068517 -0.00388492 0.1154796
## 2 -0.24374048 0.16420387 -0.8167461
##
## Coefficients of linear discriminants:
## LD1 LD2
## koi_score 2.47576492 -0.69217873
## koi_period 0.11483091 -0.44913051
## koi_period_err1 0.14808596 0.75787795
## koi_time0bk -0.10317538 0.01315609
## koi_time0bk_err2 0.06114391 0.76896924
## koi_impact -0.17997726 -0.46808872
## koi_impact_err1 0.04412994 0.23074621
## koi_impact_err2 -0.18151638 -0.56664690
## koi_duration 0.20400784 0.64587005
## koi_duration_err2 0.26032490 0.48654323
## koi_depth -0.13983395 0.35350133
## koi_prad_err1 0.01383702 -0.21484327
## koi_teq 0.06195695 0.26077490
## koi_insol_err1 0.03944878 0.13631152
## koi_steff 0.06031380 0.02007047
## koi_steff_err1 -0.12580034 -0.15429933
## koi_slogg_err1 0.16123525 0.14942134
## koi_slogg_err2 0.03902269 0.03015204
## koi_srad_err1 -0.21517507 0.10358647
## koi_srad_err2 0.09891657 0.34129737
## ra -0.02043286 -0.12930683
## dec 0.07391155 0.14058807
## Row -0.16808103 -0.57376638
##
## Proportion of trace:
## LD1 LD2
## 0.952 0.048
plot(linearDA, col = as.integer(training_data$kelas))

#ggplot
library(MASS)
library(ggplot2)
lda_model <- lda(kelas ~ ., data = training_data)
lda_pred <- predict(lda_model, test_data)
plot_df <- data.frame(lda_pred$x, kelas = as.factor(test_data$kelas))
ggplot(plot_df, aes(x = LD1, y = LD2, color = kelas)) +
geom_point(alpha = 0.5) +
theme_minimal() +
labs(title = "LDA Plot", x = "LD1", y = "LD2")

predicted <- predict(object = linearDA, newdata = test_data)
predicted$class
## [1] 2 2 0 2 0 2 0 2 2 0 2 2 2 2 2 2 2 2 2 2 2 0 0 2 2 2 2 2 2 2 2 0 2 2 2 0 0
## [38] 2 0 2 2 2 2 2 0 2 2 1 0 2 2 2 0 2 0 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2
## [75] 0 0 0 2 2 2 2 0 0 2 0 0 0 0 2 2 0 0 2 2 2 0 2 0 0 0 0 1 2 0 2 0 2 2 2 0 0
## [112] 0 2 1 2 2 0 2 0 0 2 2 2 0 0 2 2 2 0 2 0 0 2 2 2 0 1 0 2 2 0 0 0 1 2 2 2 2
## [149] 1 0 2 2 2 2 0 0 2 2 2 2 2 2 2 0 2 2 2 2 0 2 2 0 2 0 2 2 0 2 0 0 2 0 0 0 0
## [186] 2 0 2 2 0 0 2 2 0 0 0 2 2 0 0 0 2 2 2 2 2 0 2 2 0 2 2 2 0 0 2 2 1 1 2 2 0
## [223] 0 2 0 0 0 2 2 2 0 0 0 2 0 0 0 0 2 1 2 2 2 0 2 2 1 2 2 2 2 2 2 2 2 0 0 0 2
## [260] 2 1 2 0 2 2 0 2 0 2 2 2 2 1 2 2 2 2 2 2 0 2 1 2 1 0 0 2 2 2 0 0 2 0 0 2 0
## [297] 2 2 2 1 2 2 2 0 2 2 1 0 2 2 0 2 2 2 2 0 0 0 1 2 0 2 0 0 1 2 2 2 2 2 2 2 1
## [334] 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 1 2 2 2 2 2
## [371] 2 2 2 2 2 2 2 2 0 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 0 2 2 2 2 0 2 2 0 2 2
## [408] 1 2 2 2 1 1 0 2 2 2 2 2 2 2 2 2 1 2 2 2 0 2 2 2 2 1 2 2 2 2 0 2 2 2 2 2 2
## [445] 2 1 2 2 2 2 2 1 2 2 1 2 2 2 1 1 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 0 2 2
## [482] 2 1 2 2 0 2 2 2 0 2 1 2 2 1 1 2 2 2 2 1 0 1 1 2 2 0 0 2 0 2 2 2 1 2 1 2 1
## [519] 0 0 2 2 2 2 2 1 2 0 2 2 2 2 2 0 0 1 2 1 2 0 0 0 2 0 2 0 2 0 0 2 2 2 0 2 2
## [556] 2 2 2 1 1 2 0 2 2 2 1 1 2 0 0 1 1 2 0 2 0 2 0 1 2 1 0 0 2 0 0 0 0 1 0 2 1
## [593] 2 2 2 2 2 0 2 1 2 2 0 1 2 1 2 1 2 0 0 2 1 2 2 0 0 0 2 2 2 0 0 1 2 2 2 2 1
## [630] 2 1 2 2 2 2 2 2 2 0 0 0 0 1 1 2 2 2 2 2 0 1 2 2 2 0 2 2 2 0 2 2 1 2 0 1 0
## [667] 2 2 0 1 0 2 1 0 2 1 1 0 2 1 2 2 2 1 1 2 0 0 2 1 2 1 1 2 2 2 0 2 2 2 0 1 2
## [704] 2 1 2 2 0 2 2 2 0 2 2 0 2 2 0 0 2 2 2 2 0 2 2 1 2 1 2 2 0 2 2 0 2 0 1 2 2
## [741] 2 2 2 2 2 0 2 0 1 2 2 1 2 2 2 2 2 2 1 0 2 2 2 1 2 2 2 0 0 2 0 2 2 2 0 0 2
## [778] 2 0 0 2 2 0 1 2 2 2 0 2 2 2 2 1 0 0 2 2 2 1 2 2 0 0 2 1 0 1 1 2 2 0 2 2 2
## [815] 2 2 1 2 1 2 2 2 0 0 1 0 1 0 2 1 2 2 2 2 1 0 2 1 0 2 0 2 2 2 0 2 2 2 0 2 2
## [852] 2 1 0 2 0 2 1 2 2 2 2 2 2 2 0 2 2 2 1 2 2 2 2 2 0 0 2 2 1 0 1 0 1 1 0 1 2
## [889] 0 2 0 0 0 0 1 1 0 0 1 0 1 1 1 1 0 1 2 0 0 0 0 0 0 0 0 2 1 1 0 0 0 0 2 0 0
## [926] 2 0 2 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 2 2 1 0 0 2 0 0 1 1 1 1 0 0 0 1 1 0 2
## [963] 1 0 1 1 1 2 1 0 0 0 0 2 1 2 1 0 0 0 1 1 1 0 0 1 0 0 1 1 0 0 0 1 1 0 1 0 0
## [1000] 1 1 0 0 0 1 1 0 0 0 2 1 1 1 2 0 0 2 0 0 1 1 1 0 0 0 0 0 1 1 1 2 0 0 1 1 0
## [1037] 1 0 0 0 0 0 0 1 1 1 0 1 1 0 0 0 0 0 1 0 1 0 2 0 1 1 0 1 0 0 1 0 0 0 1 0 0
## [1074] 0 1 0 1 0 2 2 0 0 1 0 0 0 1 1 0 0 0 1 1 1 1 1 2 1 1 1 0 1 0 2 0 1 0 2 1 0
## [1111] 0 1 0 0 2 0 0 1 1 0 0 1 1 0 0 1 2 1 0 1 0 1 0 1 0 0 0 1 1 2 1 0 0 0 0 1 0
## [1148] 1 1 1 1 2 1 0 1 1 0 1 1 0 1 0 1 0 0 1 0 0 0 1 0 2 1 1 0 0 1 0 0 0 0 0 0 0
## [1185] 1 0 2 0 1 0 1 1 1 0 0 0 2 0 1 2 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0
## [1222] 1 1 1 0 0 1 2 0 0 0 1 1 1 1 0 1 0 0 1 0 0 0 1 1 1 1 0 0 1 0 0 1 0 1 1 0 0
## [1259] 0 1 1 0 1 1 0 0 0 1 0 0 0 1 1 2 0 0 0 0 0 2 0 1 0 0 2 0 0 0 0 1 0 1 1 0 0
## [1296] 0 2 0 1 0 0 1 0 0 0 1 1 0 1 0 0 0 2 0 0 0 0 0 1 0 0 0 0 2 2 0 0 0 0 0 0 0
## [1333] 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 2 0 0 0 0 0 1 0 0 1 0 0 0 0 1 2 0 0 0 0 0
## [1370] 0 0 0 0 0 2 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 2 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0
## [1407] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 2 0 1 0 0 0 1 0 0 0 1 0 0 0
## [1444] 1 0 0 0 0 0 0 0 0 2 0 1 1 0 1 1 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 0 1 0 1
## [1481] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 0 0
## [1518] 0 2 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0
## [1555] 1 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1 1 0 0 1 1 2 0 0 0 0 0 1 0 0 0
## [1592] 1 0 0 1 1 1 0 1 0 1 0 1 1 1 1 0 0 0 1 0 0 1 0 1 0 1 1 0 0 0 0 0 0 0 1 0 0
## [1629] 1 0 0 0 1 0 1 0 0 1 1 1 0 1 0 0 1 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0
## [1666] 0 0 0 1 1 1 1 0 1 0 0 2 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1
## [1703] 0 1 0 0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1
## [1740] 0 0 0 1 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1
## [1777] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 0
## [1814] 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## [1851] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0
## [1888] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 0 0 1 0 0 0 0 1 0 0 1 2 0 1 0 2 0
## [1925] 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0
## [1962] 0 0 0 0 0 0 0 1 0 0 0 2 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0
## [1999] 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0
## [2036] 1 1 0 0 1 1 0 0 1 1 0 0 1 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## [2073] 0 2 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1
## [2110] 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 0
## [2147] 1 0 0 0 1 0 1 0 1 0 1 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 0 1 0 0 0 0 1
## [2184] 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0
## [2221] 1 1 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 1 0 0 1
## Levels: 0 1 2
table(actual = test_data$kelas, predicted = predicted$class)
## predicted
## actual 0 1 2
## 0 1107 36 0
## 1 31 348 142
## 2 10 103 464
conf_matrix <- table(actual = test_data$kelas, predicted = predicted$class)
print(conf_matrix)
## predicted
## actual 0 1 2
## 0 1107 36 0
## 1 31 348 142
## 2 10 103 464
# Hit ratio (akurasi)
hit_ratio <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Hit Ratio (Akurasi):", hit_ratio, "\n")
## Hit Ratio (Akurasi): 0.8563141
# === BIPLOT ===
scaling_df <- as.data.frame(lda_model$scaling)
scaling_df$variable <- rownames(scaling_df)
scaling_df$contribution <- sqrt(scaling_df$LD1^2 + ifelse("LD2" %in% colnames(scaling_df), scaling_df$LD2^2, 0))
scaling_sorted <- scaling_df[order(-scaling_df$contribution), ]
top_scaling_df <- scaling_sorted[1:5, ]
top_scaling_df$LD1 <- top_scaling_df$LD1 * 5 # scaling biar panah terlihat
top_scaling_df$LD2 <- ifelse("LD2" %in% colnames(top_scaling_df), top_scaling_df$LD2 * 5, 0)
ggplot(plot_df, aes(x = LD1, y = LD2, color = kelas)) +
geom_point(alpha = 0.6, size = 2) +
geom_segment(data = top_scaling_df,
aes(x = 0, y = 0, xend = LD1, yend = LD2),
arrow = arrow(length = unit(0.2, "cm")),
color = "black") +
geom_text_repel(data = top_scaling_df,
aes(x = LD1, y = LD2, label = variable),
size = 3.5,
color = "black") +
theme_minimal() +
labs(title = "LDA Biplot (Top 5 Fitur)", x = "LD1", y = "LD2")
