Packages
library(readxl)
library(writexl)
library(ggplot2)
library(cowplot)
library(corrplot)
## corrplot 0.92 loaded
library(lattice)
library(car)
## Loading required package: carData
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
library(ROCR)
library(ResourceSelection)
## ResourceSelection 0.3-5 2019-07-22
Pre-Processing
setwd("D:/MY COLLEGE/SEMESTER 6/TPM/DATA")
data <- read_excel("Data Kelompok 2 - Regresi Logistik.xlsx", sheet = 1)
str(data)
## tibble [1,340 × 21] (S3: tbl_df/tbl/data.frame)
## $ Name : chr [1:1340] "David Robinson*" "Michael Jordan*" "Blake Griffin" "Alonzo Mourning*" ...
## $ GP : num [1:1340] 82 82 82 78 82 70 76 72 82 81 ...
## $ MIN : num [1:1340] 36.6 38.3 38 33.9 37.5 38.3 40.1 37.5 37.4 34.9 ...
## $ PTS : num [1:1340] NA 28.2 NA NA 21.6 19.9 23.5 19.2 22.9 NA ...
## $ FGM : num [1:1340] 8.4 10.2 8.5 7.3 7.8 7.3 8.2 6.3 9 6.2 ...
## $ FGA : num [1:1340] 15.9 19.8 16.8 14.3 15.6 15.2 19.8 15.2 19.7 13.1 ...
## $ FG% : num [1:1340] 53.1 51.5 50.6 51.1 49.6 47.7 41.6 41.4 45.5 47.4 ...
## $ 3P Made : num [1:1340] 0 NA 0.1 0 NA 0.1 2 NA NA 0 ...
## $ 3PA : num [1:1340] 0 0.6 0.3 0 0.3 0.4 6 4.1 1.1 0.5 ...
## $ 3P% : num [1:1340] 0 17.3 29.2 0 22.7 14.8 34.1 31.8 21.3 10 ...
## $ FTM : num [1:1340] 7.5 7.7 5.4 6.3 6 5.3 5 5.4 4.7 5.7 ...
## $ FTA : num [1:1340] 10.2 9.1 8.5 8.1 7.6 7.3 7.2 7.2 6.9 6.8 ...
## $ FT% : num [1:1340] 73.2 84.5 64.2 78.1 79.7 73.2 70.2 74.7 68.4 83.5 ...
## $ OREB : num [1:1340] 3.7 2 3.3 3.4 2.7 NA 1.5 1.3 2.1 2.1 ...
## $ DREB : num [1:1340] 8.3 4.5 8.8 6.9 2.7 4.6 2.6 NA 2.7 6.6 ...
## $ REB : num [1:1340] 12 6.5 12.1 10.3 5.4 6.4 4.1 3.7 4.8 8.7 ...
## $ AST : num [1:1340] 2 5.9 3.8 1 3.3 5 7.5 3.9 4.8 2.8 ...
## $ STL : num [1:1340] 1.7 2.4 0.8 0.3 1.1 1.8 2.1 1.1 2.5 1.3 ...
## $ BLK : num [1:1340] 3.9 0.8 0.6 3.5 0.2 0.9 0.3 1.1 1 1 ...
## $ TOV : num [1:1340] 3.1 NA 2.7 3 NA 2.9 4.4 NA NA 3.4 ...
## $ TARGET_5Yrs: num [1:1340] 1 1 1 1 1 1 1 1 1 1 ...
data$TARGET_5Yrs <- as.factor(data$TARGET_5Yrs)
data.use <- data[,-1]
head(data.use)
## # A tibble: 6 × 20
## GP MIN PTS FGM FGA `FG%` `3P Made` `3PA` `3P%` FTM FTA `FT%`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 82 36.6 NA 8.4 15.9 53.1 0 0 0 7.5 10.2 73.2
## 2 82 38.3 28.2 10.2 19.8 51.5 NA 0.6 17.3 7.7 9.1 84.5
## 3 82 38 NA 8.5 16.8 50.6 0.1 0.3 29.2 5.4 8.5 64.2
## 4 78 33.9 NA 7.3 14.3 51.1 0 0 0 6.3 8.1 78.1
## 5 82 37.5 21.6 7.8 15.6 49.6 NA 0.3 22.7 6 7.6 79.7
## 6 70 38.3 19.9 7.3 15.2 47.7 0.1 0.4 14.8 5.3 7.3 73.2
## # … with 8 more variables: OREB <dbl>, DREB <dbl>, REB <dbl>, AST <dbl>,
## # STL <dbl>, BLK <dbl>, TOV <dbl>, TARGET_5Yrs <fct>
summary(data.use)
## GP MIN PTS FGM
## Min. :11.00 Min. : 3.10 Min. : 0.700 Min. : 0.300
## 1st Qu.:48.00 1st Qu.:11.32 1st Qu.: 3.600 1st Qu.: 1.400
## Median :64.00 Median :16.70 Median : 5.400 Median : 2.100
## Mean :61.22 Mean :18.16 Mean : 6.557 Mean : 2.633
## 3rd Qu.:78.00 3rd Qu.:23.50 3rd Qu.: 8.500 3rd Qu.: 3.400
## Max. :82.00 Max. :40.90 Max. :28.200 Max. :10.200
## NA's :89 NA's :82 NA's :32 NA's :17
## FGA FG% 3P Made 3PA
## Min. : 0.800 Min. : 23.80 Min. :0.0000 Min. :0.0000
## 1st Qu.: 3.300 1st Qu.: 40.20 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 4.800 Median : 44.20 Median :0.1000 Median :0.3000
## Mean : 5.885 Mean : 46.41 Mean :0.2353 Mean :0.7792
## 3rd Qu.: 7.500 3rd Qu.: 48.10 3rd Qu.:0.3000 3rd Qu.:1.2000
## Max. :19.800 Max. :342.60 Max. :2.3000 Max. :6.5000
## NA's :54
## 3P% FTM FTA FT%
## Min. : 0.00 Min. :0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.:0.600 1st Qu.: 0.900 1st Qu.: 64.88
## Median : 22.40 Median :1.000 Median : 1.500 Median : 71.40
## Mean : 19.31 Mean :1.298 Mean : 1.815 Mean : 72.84
## 3rd Qu.: 32.50 3rd Qu.:1.600 3rd Qu.: 2.300 3rd Qu.: 77.72
## Max. :100.00 Max. :7.700 Max. :10.200 Max. :454.70
## NA's :11 NA's :41
## OREB DREB REB AST
## Min. :0.0000 Min. :0.200 Min. : 0.300 Min. : 0.000
## 1st Qu.:0.4000 1st Qu.:1.000 1st Qu.: 1.500 1st Qu.: 0.600
## Median :0.8000 Median :1.700 Median : 2.500 Median : 1.100
## Mean :0.9961 Mean :2.004 Mean : 3.034 Mean : 1.551
## 3rd Qu.:1.4000 3rd Qu.:2.600 3rd Qu.: 4.000 3rd Qu.: 2.000
## Max. :5.3000 Max. :9.600 Max. :13.900 Max. :10.600
## NA's :23 NA's :133
## STL BLK TOV TARGET_5Yrs
## Min. :0.0000 Min. :0.0000 Min. :0.10 0:509
## 1st Qu.:0.3000 1st Qu.:0.1000 1st Qu.:0.60 1:831
## Median :0.5000 Median :0.2000 Median :0.90
## Mean :0.6185 Mean :0.3686 Mean :1.06
## 3rd Qu.:0.8000 3rd Qu.:0.5000 3rd Qu.:1.30
## Max. :2.5000 Max. :3.9000 Max. :4.40
## NA's :138
Pendeteksian Missing Value
colSums(is.na(data.use))
## GP MIN PTS FGM FGA FG%
## 89 82 32 17 0 0
## 3P Made 3PA 3P% FTM FTA FT%
## 54 0 11 0 41 0
## OREB DREB REB AST STL BLK
## 23 133 0 0 0 0
## TOV TARGET_5Yrs
## 138 0
Eksplorasi Data
#Melihat pencilan
boxplot(data.use, horizontal = T)
apply(data.use, 2, median, na.rm = T)
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
## GP MIN PTS FGM FGA FG%
## "64" NA NA " 2.1" NA NA
## 3P Made 3PA 3P% FTM FTA FT%
## NA NA " 22.4" NA " 1.5" NA
## OREB DREB REB AST STL BLK
## "0.8" "1.7" NA NA NA NA
## TOV TARGET_5Yrs
## NA NA
Pendeteksian Missing Value
#Mengganti nilai NA dengan median (Median Imputation)
data.use$GP <- ifelse(is.na(data.use$GP), median(data.use$GP, na.rm = T),
data.use$GP)
data.use$MIN <- ifelse(is.na(data.use$MIN), median(data.use$MIN, na.rm = T),
data.use$MIN)
data.use$PTS <- ifelse(is.na(data.use$PTS), median(data.use$PTS, na.rm = T),
data.use$PTS)
data.use$FGM <- ifelse(is.na(data.use$FGM), median(data.use$FGM, na.rm = T),
data.use$FGM)
data.use$`3P Made` <- ifelse(is.na(data.use$`3P Made`), median(data.use$`3P Made`, na.rm = T),
data.use$`3P Made`)
data.use$`3P%` <- ifelse(is.na(data.use$`3P%`), median(data.use$`3P%`, na.rm = T),
data.use$`3P%`)
data.use$FTA <- ifelse(is.na(data.use$FTA), median(data.use$FTA, na.rm = T),
data.use$FTA)
data.use$OREB <- ifelse(is.na(data.use$OREB), median(data.use$OREB, na.rm = T),
data.use$OREB)
data.use$DREB <- ifelse(is.na(data.use$DREB), median(data.use$DREB, na.rm = T),
data.use$DREB)
data.use$TOV <- ifelse(is.na(data.use$TOV), median(data.use$TOV, na.rm = T),
data.use$TOV)
Penyimpanan Data
Data yang telah di-cleaning, lalu disimpan terlebih dahulu sebelum dilakukan tahap selanjutnya.
write_xlsx(data.use, "Data Aman.xlsx")
Deklarasi Cleaned Data
dataku <- read.csv("D:/MY COLLEGE/SEMESTER 6/TPM/DATA/Data Aman.csv")
tibble::tibble(dataku)
## # A tibble: 1,340 × 20
## GP MIN PTS FGM FGA FG. X3P.Made X3PA X3P. FTM FTA FT.
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 82 36.6 5.4 8.4 15.9 53.1 0 0 0 7.5 10.2 73.2
## 2 82 38.3 28.2 10.2 19.8 51.5 0.1 0.6 17.3 7.7 9.1 84.5
## 3 82 38 5.4 8.5 16.8 50.6 0.1 0.3 29.2 5.4 8.5 64.2
## 4 78 33.9 5.4 7.3 14.3 51.1 0 0 0 6.3 8.1 78.1
## 5 82 37.5 21.6 7.8 15.6 49.6 0.1 0.3 22.7 6 7.6 79.7
## 6 70 38.3 19.9 7.3 15.2 47.7 0.1 0.4 14.8 5.3 7.3 73.2
## 7 76 40.1 23.5 8.2 19.8 41.6 2 6 34.1 5 7.2 70.2
## 8 72 37.5 19.2 6.3 15.2 41.4 0.1 4.1 31.8 5.4 7.2 74.7
## 9 82 37.4 22.9 9 19.7 45.5 0.1 1.1 21.3 4.7 6.9 68.4
## 10 81 34.9 5.4 6.2 13.1 47.4 0 0.5 10 5.7 6.8 83.5
## # … with 1,330 more rows, and 8 more variables: OREB <dbl>, DREB <dbl>,
## # REB <dbl>, AST <dbl>, STL <dbl>, BLK <dbl>, TOV <dbl>, TARGET_5Yrs <int>
str(dataku)
## 'data.frame': 1340 obs. of 20 variables:
## $ GP : num 82 82 82 78 82 70 76 72 82 81 ...
## $ MIN : num 36.6 38.3 38 33.9 37.5 38.3 40.1 37.5 37.4 34.9 ...
## $ PTS : num 5.4 28.2 5.4 5.4 21.6 19.9 23.5 19.2 22.9 5.4 ...
## $ FGM : num 8.4 10.2 8.5 7.3 7.8 7.3 8.2 6.3 9 6.2 ...
## $ FGA : num 15.9 19.8 16.8 14.3 15.6 15.2 19.8 15.2 19.7 13.1 ...
## $ FG. : num 53.1 51.5 50.6 51.1 49.6 47.7 41.6 41.4 45.5 47.4 ...
## $ X3P.Made : num 0 0.1 0.1 0 0.1 0.1 2 0.1 0.1 0 ...
## $ X3PA : num 0 0.6 0.3 0 0.3 0.4 6 4.1 1.1 0.5 ...
## $ X3P. : num 0 17.3 29.2 0 22.7 14.8 34.1 31.8 21.3 10 ...
## $ FTM : num 7.5 7.7 5.4 6.3 6 5.3 5 5.4 4.7 5.7 ...
## $ FTA : num 10.2 9.1 8.5 8.1 7.6 7.3 7.2 7.2 6.9 6.8 ...
## $ FT. : num 73.2 84.5 64.2 78.1 79.7 73.2 70.2 74.7 68.4 83.5 ...
## $ OREB : num 3.7 2 3.3 3.4 2.7 0.8 1.5 1.3 2.1 2.1 ...
## $ DREB : num 8.3 4.5 8.8 6.9 2.7 4.6 2.6 1.7 2.7 6.6 ...
## $ REB : num 12 6.5 12.1 10.3 5.4 6.4 4.1 3.7 4.8 8.7 ...
## $ AST : num 2 5.9 3.8 1 3.3 5 7.5 3.9 4.8 2.8 ...
## $ STL : num 1.7 2.4 0.8 0.3 1.1 1.8 2.1 1.1 2.5 1.3 ...
## $ BLK : num 3.9 0.8 0.6 3.5 0.2 0.9 0.3 1.1 1 1 ...
## $ TOV : num 3.1 0.9 2.7 3 0.9 2.9 4.4 0.9 0.9 3.4 ...
## $ TARGET_5Yrs: int 1 1 1 1 1 1 1 1 1 1 ...
Pengubahan Format Data
dataku <- dataku %>% mutate_if(is.character, as.numeric)
str(dataku)
## 'data.frame': 1340 obs. of 20 variables:
## $ GP : num 82 82 82 78 82 70 76 72 82 81 ...
## $ MIN : num 36.6 38.3 38 33.9 37.5 38.3 40.1 37.5 37.4 34.9 ...
## $ PTS : num 5.4 28.2 5.4 5.4 21.6 19.9 23.5 19.2 22.9 5.4 ...
## $ FGM : num 8.4 10.2 8.5 7.3 7.8 7.3 8.2 6.3 9 6.2 ...
## $ FGA : num 15.9 19.8 16.8 14.3 15.6 15.2 19.8 15.2 19.7 13.1 ...
## $ FG. : num 53.1 51.5 50.6 51.1 49.6 47.7 41.6 41.4 45.5 47.4 ...
## $ X3P.Made : num 0 0.1 0.1 0 0.1 0.1 2 0.1 0.1 0 ...
## $ X3PA : num 0 0.6 0.3 0 0.3 0.4 6 4.1 1.1 0.5 ...
## $ X3P. : num 0 17.3 29.2 0 22.7 14.8 34.1 31.8 21.3 10 ...
## $ FTM : num 7.5 7.7 5.4 6.3 6 5.3 5 5.4 4.7 5.7 ...
## $ FTA : num 10.2 9.1 8.5 8.1 7.6 7.3 7.2 7.2 6.9 6.8 ...
## $ FT. : num 73.2 84.5 64.2 78.1 79.7 73.2 70.2 74.7 68.4 83.5 ...
## $ OREB : num 3.7 2 3.3 3.4 2.7 0.8 1.5 1.3 2.1 2.1 ...
## $ DREB : num 8.3 4.5 8.8 6.9 2.7 4.6 2.6 1.7 2.7 6.6 ...
## $ REB : num 12 6.5 12.1 10.3 5.4 6.4 4.1 3.7 4.8 8.7 ...
## $ AST : num 2 5.9 3.8 1 3.3 5 7.5 3.9 4.8 2.8 ...
## $ STL : num 1.7 2.4 0.8 0.3 1.1 1.8 2.1 1.1 2.5 1.3 ...
## $ BLK : num 3.9 0.8 0.6 3.5 0.2 0.9 0.3 1.1 1 1 ...
## $ TOV : num 3.1 0.9 2.7 3 0.9 2.9 4.4 0.9 0.9 3.4 ...
## $ TARGET_5Yrs: int 1 1 1 1 1 1 1 1 1 1 ...
Splitting Data
set.seed(1704)
in.train <- caret::createDataPartition(as.factor(dataku$TARGET_5Yrs), p=0.8, list=FALSE)
dataku.train <- dataku[in.train,]
dataku.test <- dataku[-in.train,]
Pemodelan & Seleksi Peubah
dataku.glm0 <- glm(TARGET_5Yrs ~ ., family = binomial, dataku.train)
dataku.glm.step <- step(dataku.glm0)
## Start: AIC=1248.76
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA +
## X3P. + FTM + FTA + FT. + OREB + DREB + REB + AST + STL +
## BLK + TOV
##
## Df Deviance AIC
## - FT. 1 1208.8 1246.8
## - REB 1 1208.8 1246.8
## - DREB 1 1208.9 1246.9
## - TOV 1 1208.9 1246.9
## - FG. 1 1209.0 1247.0
## - STL 1 1209.0 1247.0
## - X3P. 1 1209.2 1247.2
## - X3P.Made 1 1209.5 1247.5
## - FGA 1 1209.6 1247.6
## - FGM 1 1209.6 1247.6
## - X3PA 1 1209.7 1247.7
## - MIN 1 1210.5 1248.5
## - PTS 1 1210.6 1248.6
## <none> 1208.8 1248.8
## - BLK 1 1211.2 1249.2
## - FTM 1 1212.8 1250.8
## - AST 1 1213.0 1251.0
## - OREB 1 1213.2 1251.2
## - FTA 1 1213.6 1251.6
## - GP 1 1246.8 1284.8
##
## Step: AIC=1246.78
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA +
## X3P. + FTM + FTA + OREB + DREB + REB + AST + STL + BLK +
## TOV
##
## Df Deviance AIC
## - REB 1 1208.8 1244.8
## - DREB 1 1208.9 1244.9
## - TOV 1 1209.0 1245.0
## - FG. 1 1209.0 1245.0
## - STL 1 1209.0 1245.0
## - X3P. 1 1209.2 1245.2
## - X3P.Made 1 1209.5 1245.5
## - FGA 1 1209.7 1245.7
## - FGM 1 1209.7 1245.7
## - X3PA 1 1209.7 1245.7
## - MIN 1 1210.5 1246.5
## - PTS 1 1210.7 1246.7
## <none> 1208.8 1246.8
## - BLK 1 1211.2 1247.2
## - AST 1 1213.0 1249.0
## - OREB 1 1213.2 1249.2
## - FTM 1 1213.3 1249.3
## - FTA 1 1214.1 1250.1
## - GP 1 1246.8 1282.8
##
## Step: AIC=1244.81
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA +
## X3P. + FTM + FTA + OREB + DREB + AST + STL + BLK + TOV
##
## Df Deviance AIC
## - TOV 1 1209.0 1243.0
## - FG. 1 1209.0 1243.0
## - STL 1 1209.0 1243.0
## - DREB 1 1209.0 1243.0
## - X3P. 1 1209.2 1243.2
## - X3P.Made 1 1209.6 1243.6
## - FGM 1 1209.7 1243.7
## - FGA 1 1209.7 1243.7
## - X3PA 1 1209.7 1243.7
## - MIN 1 1210.5 1244.5
## - PTS 1 1210.7 1244.7
## <none> 1208.8 1244.8
## - BLK 1 1211.3 1245.3
## - AST 1 1213.0 1247.0
## - FTM 1 1213.3 1247.3
## - FTA 1 1214.1 1248.1
## - OREB 1 1220.9 1254.9
## - GP 1 1247.0 1281.0
##
## Step: AIC=1242.98
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA +
## X3P. + FTM + FTA + OREB + DREB + AST + STL + BLK
##
## Df Deviance AIC
## - DREB 1 1209.2 1241.2
## - STL 1 1209.2 1241.2
## - FG. 1 1209.2 1241.2
## - X3P. 1 1209.4 1241.4
## - FGM 1 1209.8 1241.8
## - FGA 1 1209.8 1241.8
## - X3P.Made 1 1210.0 1242.0
## - X3PA 1 1210.3 1242.3
## - MIN 1 1210.7 1242.7
## <none> 1209.0 1243.0
## - PTS 1 1211.0 1243.0
## - BLK 1 1211.5 1243.5
## - FTM 1 1213.4 1245.4
## - FTA 1 1214.1 1246.1
## - AST 1 1214.4 1246.4
## - OREB 1 1220.9 1252.9
## - GP 1 1248.4 1280.4
##
## Step: AIC=1241.15
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA +
## X3P. + FTM + FTA + OREB + AST + STL + BLK
##
## Df Deviance AIC
## - STL 1 1209.3 1239.3
## - FG. 1 1209.4 1239.4
## - X3P. 1 1209.5 1239.5
## - FGM 1 1209.9 1239.9
## - FGA 1 1210.0 1240.0
## - X3P.Made 1 1210.1 1240.1
## - X3PA 1 1210.4 1240.4
## <none> 1209.2 1241.2
## - BLK 1 1211.5 1241.5
## - MIN 1 1211.7 1241.7
## - PTS 1 1211.7 1241.7
## - FTM 1 1213.7 1243.7
## - FTA 1 1214.5 1244.5
## - AST 1 1214.7 1244.7
## - OREB 1 1222.7 1252.7
## - GP 1 1248.4 1278.4
##
## Step: AIC=1239.35
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA +
## X3P. + FTM + FTA + OREB + AST + BLK
##
## Df Deviance AIC
## - FG. 1 1209.5 1237.5
## - X3P. 1 1209.7 1237.7
## - FGM 1 1210.1 1238.1
## - FGA 1 1210.2 1238.2
## - X3P.Made 1 1210.3 1238.3
## - X3PA 1 1210.6 1238.6
## <none> 1209.3 1239.3
## - BLK 1 1211.7 1239.7
## - PTS 1 1211.8 1239.8
## - MIN 1 1212.1 1240.1
## - FTM 1 1214.0 1242.0
## - FTA 1 1214.8 1242.8
## - AST 1 1215.3 1243.3
## - OREB 1 1222.7 1250.7
## - GP 1 1248.5 1276.5
##
## Step: AIC=1237.54
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + X3P.Made + X3PA +
## X3P. + FTM + FTA + OREB + AST + BLK
##
## Df Deviance AIC
## - X3P. 1 1209.9 1235.9
## - FGM 1 1210.2 1236.2
## - FGA 1 1210.2 1236.2
## - X3P.Made 1 1210.5 1236.5
## - X3PA 1 1210.8 1236.8
## <none> 1209.5 1237.5
## - BLK 1 1211.9 1237.9
## - PTS 1 1212.0 1238.0
## - MIN 1 1212.2 1238.2
## - FTM 1 1214.3 1240.3
## - FTA 1 1215.2 1241.2
## - AST 1 1215.4 1241.4
## - OREB 1 1222.7 1248.7
## - GP 1 1249.0 1275.0
##
## Step: AIC=1235.9
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + X3P.Made + X3PA +
## FTM + FTA + OREB + AST + BLK
##
## Df Deviance AIC
## - FGM 1 1210.6 1234.6
## - FGA 1 1210.6 1234.6
## - X3PA 1 1211.1 1235.1
## - X3P.Made 1 1211.1 1235.1
## <none> 1209.9 1235.9
## - BLK 1 1212.1 1236.1
## - PTS 1 1212.3 1236.3
## - MIN 1 1212.6 1236.6
## - FTM 1 1215.0 1239.0
## - AST 1 1215.8 1239.8
## - FTA 1 1215.9 1239.9
## - OREB 1 1222.8 1246.8
## - GP 1 1249.3 1273.3
##
## Step: AIC=1234.57
## TARGET_5Yrs ~ GP + MIN + PTS + FGA + X3P.Made + X3PA + FTM +
## FTA + OREB + AST + BLK
##
## Df Deviance AIC
## - FGA 1 1210.6 1232.6
## - X3P.Made 1 1212.1 1234.1
## <none> 1210.6 1234.6
## - X3PA 1 1212.6 1234.6
## - BLK 1 1213.0 1235.0
## - MIN 1 1213.2 1235.2
## - PTS 1 1214.2 1236.2
## - FTM 1 1215.6 1237.6
## - AST 1 1216.4 1238.4
## - FTA 1 1216.4 1238.4
## - OREB 1 1224.7 1246.7
## - GP 1 1251.0 1273.0
##
## Step: AIC=1232.63
## TARGET_5Yrs ~ GP + MIN + PTS + X3P.Made + X3PA + FTM + FTA +
## OREB + AST + BLK
##
## Df Deviance AIC
## - X3P.Made 1 1212.2 1232.2
## <none> 1210.6 1232.6
## - X3PA 1 1212.8 1232.8
## - BLK 1 1213.1 1233.1
## - MIN 1 1213.7 1233.7
## - FTM 1 1215.6 1235.6
## - PTS 1 1216.0 1236.0
## - AST 1 1216.4 1236.4
## - FTA 1 1216.5 1236.5
## - OREB 1 1224.7 1244.7
## - GP 1 1251.2 1271.2
##
## Step: AIC=1232.18
## TARGET_5Yrs ~ GP + MIN + PTS + X3PA + FTM + FTA + OREB + AST +
## BLK
##
## Df Deviance AIC
## - X3PA 1 1212.8 1230.8
## <none> 1212.2 1232.2
## - BLK 1 1214.6 1232.6
## - MIN 1 1215.0 1233.0
## - AST 1 1217.0 1235.0
## - FTM 1 1217.1 1235.1
## - PTS 1 1217.4 1235.4
## - FTA 1 1218.2 1236.2
## - OREB 1 1225.9 1243.9
## - GP 1 1254.7 1272.7
##
## Step: AIC=1230.76
## TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB + AST + BLK
##
## Df Deviance AIC
## <none> 1212.8 1230.8
## - BLK 1 1215.2 1231.2
## - MIN 1 1217.1 1233.1
## - PTS 1 1217.4 1233.4
## - FTM 1 1217.6 1233.6
## - AST 1 1218.0 1234.0
## - FTA 1 1218.4 1234.4
## - OREB 1 1231.8 1247.8
## - GP 1 1257.5 1273.5
summary(dataku.glm.step)
##
## Call:
## glm(formula = TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB +
## AST + BLK, family = binomial, data = dataku.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5155 -1.0281 0.5267 0.9067 2.2067
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.301880 0.285271 -8.069 7.08e-16 ***
## GP 0.033123 0.005066 6.539 6.20e-11 ***
## MIN -0.046123 0.022075 -2.089 0.0367 *
## PTS 0.099187 0.044762 2.216 0.0267 *
## FTM 0.914006 0.429623 2.127 0.0334 *
## FTA -0.731323 0.315856 -2.315 0.0206 *
## OREB 0.789246 0.188015 4.198 2.70e-05 ***
## AST 0.194997 0.087549 2.227 0.0259 *
## BLK 0.420820 0.274213 1.535 0.1249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1425.3 on 1072 degrees of freedom
## Residual deviance: 1212.8 on 1064 degrees of freedom
## AIC: 1230.8
##
## Number of Fisher Scoring iterations: 5
dataku.glm.final <- glm(formula = TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB + AST + BLK, family = binomial, data = dataku.train)
summary(dataku.glm.final)
##
## Call:
## glm(formula = TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB +
## AST + BLK, family = binomial, data = dataku.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5155 -1.0281 0.5267 0.9067 2.2067
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.301880 0.285271 -8.069 7.08e-16 ***
## GP 0.033123 0.005066 6.539 6.20e-11 ***
## MIN -0.046123 0.022075 -2.089 0.0367 *
## PTS 0.099187 0.044762 2.216 0.0267 *
## FTM 0.914006 0.429623 2.127 0.0334 *
## FTA -0.731323 0.315856 -2.315 0.0206 *
## OREB 0.789246 0.188015 4.198 2.70e-05 ***
## AST 0.194997 0.087549 2.227 0.0259 *
## BLK 0.420820 0.274213 1.535 0.1249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1425.3 on 1072 degrees of freedom
## Residual deviance: 1212.8 on 1064 degrees of freedom
## AIC: 1230.8
##
## Number of Fisher Scoring iterations: 5
Interpretasi Koefisien Model
exp(dataku.glm.final$coefficients)
## (Intercept) GP MIN PTS FTM FTA
## 0.1000705 1.0336781 0.9549247 1.1042732 2.4942937 0.4812719
## OREB AST BLK
## 2.2017353 1.2153074 1.5232105
Uji Parameter Model
#Model tanpa peubah x
modeltanpax <- glm(formula = TARGET_5Yrs ~ 1, family = binomial, data = dataku.train)
Uji Deviance (simultan)
#Beberapa alternatif perhitungan statistik uji G2 pada R
modeltanpax$deviance-dataku.glm.final$deviance
## [1] 212.5711
1-pchisq(dataku.glm.final$null.deviance-dataku.glm.final$deviance, dataku.glm.final$df.null-dataku.glm.final$df.residual)
## [1] 0
qchisq(1-0.05, df=dataku.glm.final$df.null-dataku.glm.final$df.residual)
## [1] 15.50731
dataku.glm.final$null.deviance-dataku.glm.final$deviance
## [1] 212.5711
(-2*logLik(modeltanpax))-(-2*logLik(dataku.glm.final))
## 'log Lik.' 212.5711 (df=1)
Uji Rasio Likeihood (pembandingan)
lrtest(modeltanpax, dataku.glm.final)
## Likelihood ratio test
##
## Model 1: TARGET_5Yrs ~ 1
## Model 2: TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB + AST + BLK
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 1 -712.67
## 2 9 -606.38 8 212.57 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Uji Wald (Parsial)
Anova(dataku.glm.final,type='II',test='Wald')
## Analysis of Deviance Table (Type II tests)
##
## Response: TARGET_5Yrs
## Df Chisq Pr(>Chisq)
## GP 1 42.7561 6.201e-11 ***
## MIN 1 4.3654 0.03668 *
## PTS 1 4.9102 0.02670 *
## FTM 1 4.5261 0.03338 *
## FTA 1 5.3609 0.02059 *
## OREB 1 17.6215 2.695e-05 ***
## AST 1 4.9608 0.02593 *
## BLK 1 2.3551 0.12487
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Uji Kecocokan Model
#Hosmer Lemeshow Test
hoslem.test(dataku.train$TARGET_5Yrs,dataku.glm.final$fitted.values)
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: dataku.train$TARGET_5Yrs, dataku.glm.final$fitted.values
## X-squared = 6.006, df = 8, p-value = 0.6466
Evaluasi Model
fit.final <- fitted.values(dataku.glm.final)
pred.final <- ifelse(fit.final>=0.5,"GOOD","BAD")
tab <- table(dataku.train$TARGET_5Yrs, pred.final, dnn = c("Truth", "Predicted"))
tab
## Predicted
## Truth BAD GOOD
## 0 216 192
## 1 117 548
Akurasi
akurasi <- sum(diag(tab))/sum(tab)
akurasi
## [1] 0.7120224
Confusion Matrix Y Aktual & Prediksi
pdata=predict(dataku.glm.final,newdata=dataku.train,type="response")
y_prediksi<-ifelse(pdata<0.5,"bad","good")
y_aktual<-dataku.train$TARGET_5Yrs
klf=table(y_aktual,y_prediksi)
accuracy=(klf[1,1]+klf[2,2])/sum(klf)*100
sensitivity= klf[2,2]/sum(klf[2,])*100
specificity= klf[1,1]/sum(klf[1,])*100
fprate=klf[2,1]/(klf[2,1]+klf[1,1])*100
AUC=(100+sensitivity-fprate)/2
performa=data.frame(accuracy,sensitivity,specificity,AUC)
klf
## y_prediksi
## y_aktual bad good
## 0 216 192
## 1 117 548
performa
## accuracy sensitivity specificity AUC
## 1 71.20224 82.40602 52.94118 73.63544
ROC plot
pred<-
prediction(predict.glm(dataku.glm.final,dataku.test),dataku.test$TARGET_5Yrs)
perf <- performance(pred,"tpr","fpr")
plot(perf)