Afris Setiya Intan Amanda

2/11/2023

Regresi Logistik Biner

Packages

library(readxl)
library(writexl)
library(ggplot2)
library(cowplot)
library(corrplot)
## corrplot 0.92 loaded
library(lattice)
library(car)
## Loading required package: carData
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
library(ROCR)
library(ResourceSelection)
## ResourceSelection 0.3-5   2019-07-22

Pre-Processing

setwd("D:/MY COLLEGE/SEMESTER 6/TPM/DATA")
data <- read_excel("Data Kelompok 2 - Regresi Logistik.xlsx", sheet = 1)
str(data)
## tibble [1,340 × 21] (S3: tbl_df/tbl/data.frame)
##  $ Name       : chr [1:1340] "David Robinson*" "Michael Jordan*" "Blake Griffin" "Alonzo Mourning*" ...
##  $ GP         : num [1:1340] 82 82 82 78 82 70 76 72 82 81 ...
##  $ MIN        : num [1:1340] 36.6 38.3 38 33.9 37.5 38.3 40.1 37.5 37.4 34.9 ...
##  $ PTS        : num [1:1340] NA 28.2 NA NA 21.6 19.9 23.5 19.2 22.9 NA ...
##  $ FGM        : num [1:1340] 8.4 10.2 8.5 7.3 7.8 7.3 8.2 6.3 9 6.2 ...
##  $ FGA        : num [1:1340] 15.9 19.8 16.8 14.3 15.6 15.2 19.8 15.2 19.7 13.1 ...
##  $ FG%        : num [1:1340] 53.1 51.5 50.6 51.1 49.6 47.7 41.6 41.4 45.5 47.4 ...
##  $ 3P Made    : num [1:1340] 0 NA 0.1 0 NA 0.1 2 NA NA 0 ...
##  $ 3PA        : num [1:1340] 0 0.6 0.3 0 0.3 0.4 6 4.1 1.1 0.5 ...
##  $ 3P%        : num [1:1340] 0 17.3 29.2 0 22.7 14.8 34.1 31.8 21.3 10 ...
##  $ FTM        : num [1:1340] 7.5 7.7 5.4 6.3 6 5.3 5 5.4 4.7 5.7 ...
##  $ FTA        : num [1:1340] 10.2 9.1 8.5 8.1 7.6 7.3 7.2 7.2 6.9 6.8 ...
##  $ FT%        : num [1:1340] 73.2 84.5 64.2 78.1 79.7 73.2 70.2 74.7 68.4 83.5 ...
##  $ OREB       : num [1:1340] 3.7 2 3.3 3.4 2.7 NA 1.5 1.3 2.1 2.1 ...
##  $ DREB       : num [1:1340] 8.3 4.5 8.8 6.9 2.7 4.6 2.6 NA 2.7 6.6 ...
##  $ REB        : num [1:1340] 12 6.5 12.1 10.3 5.4 6.4 4.1 3.7 4.8 8.7 ...
##  $ AST        : num [1:1340] 2 5.9 3.8 1 3.3 5 7.5 3.9 4.8 2.8 ...
##  $ STL        : num [1:1340] 1.7 2.4 0.8 0.3 1.1 1.8 2.1 1.1 2.5 1.3 ...
##  $ BLK        : num [1:1340] 3.9 0.8 0.6 3.5 0.2 0.9 0.3 1.1 1 1 ...
##  $ TOV        : num [1:1340] 3.1 NA 2.7 3 NA 2.9 4.4 NA NA 3.4 ...
##  $ TARGET_5Yrs: num [1:1340] 1 1 1 1 1 1 1 1 1 1 ...
data$TARGET_5Yrs <- as.factor(data$TARGET_5Yrs)
data.use <- data[,-1]

head(data.use)
## # A tibble: 6 × 20
##      GP   MIN   PTS   FGM   FGA `FG%` `3P Made` `3PA` `3P%`   FTM   FTA `FT%`
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1    82  36.6  NA     8.4  15.9  53.1       0     0     0     7.5  10.2  73.2
## 2    82  38.3  28.2  10.2  19.8  51.5      NA     0.6  17.3   7.7   9.1  84.5
## 3    82  38    NA     8.5  16.8  50.6       0.1   0.3  29.2   5.4   8.5  64.2
## 4    78  33.9  NA     7.3  14.3  51.1       0     0     0     6.3   8.1  78.1
## 5    82  37.5  21.6   7.8  15.6  49.6      NA     0.3  22.7   6     7.6  79.7
## 6    70  38.3  19.9   7.3  15.2  47.7       0.1   0.4  14.8   5.3   7.3  73.2
## # … with 8 more variables: OREB <dbl>, DREB <dbl>, REB <dbl>, AST <dbl>,
## #   STL <dbl>, BLK <dbl>, TOV <dbl>, TARGET_5Yrs <fct>
summary(data.use)
##        GP             MIN             PTS              FGM        
##  Min.   :11.00   Min.   : 3.10   Min.   : 0.700   Min.   : 0.300  
##  1st Qu.:48.00   1st Qu.:11.32   1st Qu.: 3.600   1st Qu.: 1.400  
##  Median :64.00   Median :16.70   Median : 5.400   Median : 2.100  
##  Mean   :61.22   Mean   :18.16   Mean   : 6.557   Mean   : 2.633  
##  3rd Qu.:78.00   3rd Qu.:23.50   3rd Qu.: 8.500   3rd Qu.: 3.400  
##  Max.   :82.00   Max.   :40.90   Max.   :28.200   Max.   :10.200  
##  NA's   :89      NA's   :82      NA's   :32       NA's   :17      
##       FGA              FG%            3P Made            3PA        
##  Min.   : 0.800   Min.   : 23.80   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 3.300   1st Qu.: 40.20   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median : 4.800   Median : 44.20   Median :0.1000   Median :0.3000  
##  Mean   : 5.885   Mean   : 46.41   Mean   :0.2353   Mean   :0.7792  
##  3rd Qu.: 7.500   3rd Qu.: 48.10   3rd Qu.:0.3000   3rd Qu.:1.2000  
##  Max.   :19.800   Max.   :342.60   Max.   :2.3000   Max.   :6.5000  
##                                    NA's   :54                       
##       3P%              FTM             FTA              FT%        
##  Min.   :  0.00   Min.   :0.000   Min.   : 0.000   Min.   :  0.00  
##  1st Qu.:  0.00   1st Qu.:0.600   1st Qu.: 0.900   1st Qu.: 64.88  
##  Median : 22.40   Median :1.000   Median : 1.500   Median : 71.40  
##  Mean   : 19.31   Mean   :1.298   Mean   : 1.815   Mean   : 72.84  
##  3rd Qu.: 32.50   3rd Qu.:1.600   3rd Qu.: 2.300   3rd Qu.: 77.72  
##  Max.   :100.00   Max.   :7.700   Max.   :10.200   Max.   :454.70  
##  NA's   :11                       NA's   :41                       
##       OREB             DREB            REB              AST        
##  Min.   :0.0000   Min.   :0.200   Min.   : 0.300   Min.   : 0.000  
##  1st Qu.:0.4000   1st Qu.:1.000   1st Qu.: 1.500   1st Qu.: 0.600  
##  Median :0.8000   Median :1.700   Median : 2.500   Median : 1.100  
##  Mean   :0.9961   Mean   :2.004   Mean   : 3.034   Mean   : 1.551  
##  3rd Qu.:1.4000   3rd Qu.:2.600   3rd Qu.: 4.000   3rd Qu.: 2.000  
##  Max.   :5.3000   Max.   :9.600   Max.   :13.900   Max.   :10.600  
##  NA's   :23       NA's   :133                                      
##       STL              BLK              TOV       TARGET_5Yrs
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.10   0:509      
##  1st Qu.:0.3000   1st Qu.:0.1000   1st Qu.:0.60   1:831      
##  Median :0.5000   Median :0.2000   Median :0.90              
##  Mean   :0.6185   Mean   :0.3686   Mean   :1.06              
##  3rd Qu.:0.8000   3rd Qu.:0.5000   3rd Qu.:1.30              
##  Max.   :2.5000   Max.   :3.9000   Max.   :4.40              
##                                    NA's   :138

Pendeteksian Missing Value

colSums(is.na(data.use))
##          GP         MIN         PTS         FGM         FGA         FG% 
##          89          82          32          17           0           0 
##     3P Made         3PA         3P%         FTM         FTA         FT% 
##          54           0          11           0          41           0 
##        OREB        DREB         REB         AST         STL         BLK 
##          23         133           0           0           0           0 
##         TOV TARGET_5Yrs 
##         138           0

Eksplorasi Data

#Melihat pencilan
boxplot(data.use, horizontal = T)

apply(data.use, 2, median, na.rm = T)
## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA

## Warning in mean.default(sort(x, partial = half + 0L:1L)[half + 0L:1L]): argument
## is not numeric or logical: returning NA
##          GP         MIN         PTS         FGM         FGA         FG% 
##        "64"          NA          NA      " 2.1"          NA          NA 
##     3P Made         3PA         3P%         FTM         FTA         FT% 
##          NA          NA     " 22.4"          NA      " 1.5"          NA 
##        OREB        DREB         REB         AST         STL         BLK 
##       "0.8"       "1.7"          NA          NA          NA          NA 
##         TOV TARGET_5Yrs 
##          NA          NA

Pendeteksian Missing Value

#Mengganti nilai NA dengan median (Median Imputation)
data.use$GP <- ifelse(is.na(data.use$GP), median(data.use$GP, na.rm = T),
                      data.use$GP)

data.use$MIN <- ifelse(is.na(data.use$MIN), median(data.use$MIN, na.rm = T),
                       data.use$MIN)

data.use$PTS <- ifelse(is.na(data.use$PTS), median(data.use$PTS, na.rm = T),
                       data.use$PTS)

data.use$FGM <- ifelse(is.na(data.use$FGM), median(data.use$FGM, na.rm = T),
                       data.use$FGM)

data.use$`3P Made` <- ifelse(is.na(data.use$`3P Made`), median(data.use$`3P Made`, na.rm = T),
                             data.use$`3P Made`)

data.use$`3P%` <- ifelse(is.na(data.use$`3P%`), median(data.use$`3P%`, na.rm = T),
                         data.use$`3P%`)

data.use$FTA <- ifelse(is.na(data.use$FTA), median(data.use$FTA, na.rm = T),
                       data.use$FTA)

data.use$OREB <- ifelse(is.na(data.use$OREB), median(data.use$OREB, na.rm = T),
                        data.use$OREB)

data.use$DREB <- ifelse(is.na(data.use$DREB), median(data.use$DREB, na.rm = T),
                        data.use$DREB)

data.use$TOV <- ifelse(is.na(data.use$TOV), median(data.use$TOV, na.rm = T),
                       data.use$TOV)

Penyimpanan Data

Data yang telah di-cleaning, lalu disimpan terlebih dahulu sebelum dilakukan tahap selanjutnya.

write_xlsx(data.use, "Data Aman.xlsx")

Deklarasi Cleaned Data

dataku <- read.csv("D:/MY COLLEGE/SEMESTER 6/TPM/DATA/Data Aman.csv")
tibble::tibble(dataku)
## # A tibble: 1,340 × 20
##       GP   MIN   PTS   FGM   FGA   FG. X3P.Made  X3PA  X3P.   FTM   FTA   FT.
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1    82  36.6   5.4   8.4  15.9  53.1      0     0     0     7.5  10.2  73.2
##  2    82  38.3  28.2  10.2  19.8  51.5      0.1   0.6  17.3   7.7   9.1  84.5
##  3    82  38     5.4   8.5  16.8  50.6      0.1   0.3  29.2   5.4   8.5  64.2
##  4    78  33.9   5.4   7.3  14.3  51.1      0     0     0     6.3   8.1  78.1
##  5    82  37.5  21.6   7.8  15.6  49.6      0.1   0.3  22.7   6     7.6  79.7
##  6    70  38.3  19.9   7.3  15.2  47.7      0.1   0.4  14.8   5.3   7.3  73.2
##  7    76  40.1  23.5   8.2  19.8  41.6      2     6    34.1   5     7.2  70.2
##  8    72  37.5  19.2   6.3  15.2  41.4      0.1   4.1  31.8   5.4   7.2  74.7
##  9    82  37.4  22.9   9    19.7  45.5      0.1   1.1  21.3   4.7   6.9  68.4
## 10    81  34.9   5.4   6.2  13.1  47.4      0     0.5  10     5.7   6.8  83.5
## # … with 1,330 more rows, and 8 more variables: OREB <dbl>, DREB <dbl>,
## #   REB <dbl>, AST <dbl>, STL <dbl>, BLK <dbl>, TOV <dbl>, TARGET_5Yrs <int>
str(dataku)
## 'data.frame':    1340 obs. of  20 variables:
##  $ GP         : num  82 82 82 78 82 70 76 72 82 81 ...
##  $ MIN        : num  36.6 38.3 38 33.9 37.5 38.3 40.1 37.5 37.4 34.9 ...
##  $ PTS        : num  5.4 28.2 5.4 5.4 21.6 19.9 23.5 19.2 22.9 5.4 ...
##  $ FGM        : num  8.4 10.2 8.5 7.3 7.8 7.3 8.2 6.3 9 6.2 ...
##  $ FGA        : num  15.9 19.8 16.8 14.3 15.6 15.2 19.8 15.2 19.7 13.1 ...
##  $ FG.        : num  53.1 51.5 50.6 51.1 49.6 47.7 41.6 41.4 45.5 47.4 ...
##  $ X3P.Made   : num  0 0.1 0.1 0 0.1 0.1 2 0.1 0.1 0 ...
##  $ X3PA       : num  0 0.6 0.3 0 0.3 0.4 6 4.1 1.1 0.5 ...
##  $ X3P.       : num  0 17.3 29.2 0 22.7 14.8 34.1 31.8 21.3 10 ...
##  $ FTM        : num  7.5 7.7 5.4 6.3 6 5.3 5 5.4 4.7 5.7 ...
##  $ FTA        : num  10.2 9.1 8.5 8.1 7.6 7.3 7.2 7.2 6.9 6.8 ...
##  $ FT.        : num  73.2 84.5 64.2 78.1 79.7 73.2 70.2 74.7 68.4 83.5 ...
##  $ OREB       : num  3.7 2 3.3 3.4 2.7 0.8 1.5 1.3 2.1 2.1 ...
##  $ DREB       : num  8.3 4.5 8.8 6.9 2.7 4.6 2.6 1.7 2.7 6.6 ...
##  $ REB        : num  12 6.5 12.1 10.3 5.4 6.4 4.1 3.7 4.8 8.7 ...
##  $ AST        : num  2 5.9 3.8 1 3.3 5 7.5 3.9 4.8 2.8 ...
##  $ STL        : num  1.7 2.4 0.8 0.3 1.1 1.8 2.1 1.1 2.5 1.3 ...
##  $ BLK        : num  3.9 0.8 0.6 3.5 0.2 0.9 0.3 1.1 1 1 ...
##  $ TOV        : num  3.1 0.9 2.7 3 0.9 2.9 4.4 0.9 0.9 3.4 ...
##  $ TARGET_5Yrs: int  1 1 1 1 1 1 1 1 1 1 ...

Pengubahan Format Data

dataku <- dataku %>% mutate_if(is.character, as.numeric)
str(dataku)
## 'data.frame':    1340 obs. of  20 variables:
##  $ GP         : num  82 82 82 78 82 70 76 72 82 81 ...
##  $ MIN        : num  36.6 38.3 38 33.9 37.5 38.3 40.1 37.5 37.4 34.9 ...
##  $ PTS        : num  5.4 28.2 5.4 5.4 21.6 19.9 23.5 19.2 22.9 5.4 ...
##  $ FGM        : num  8.4 10.2 8.5 7.3 7.8 7.3 8.2 6.3 9 6.2 ...
##  $ FGA        : num  15.9 19.8 16.8 14.3 15.6 15.2 19.8 15.2 19.7 13.1 ...
##  $ FG.        : num  53.1 51.5 50.6 51.1 49.6 47.7 41.6 41.4 45.5 47.4 ...
##  $ X3P.Made   : num  0 0.1 0.1 0 0.1 0.1 2 0.1 0.1 0 ...
##  $ X3PA       : num  0 0.6 0.3 0 0.3 0.4 6 4.1 1.1 0.5 ...
##  $ X3P.       : num  0 17.3 29.2 0 22.7 14.8 34.1 31.8 21.3 10 ...
##  $ FTM        : num  7.5 7.7 5.4 6.3 6 5.3 5 5.4 4.7 5.7 ...
##  $ FTA        : num  10.2 9.1 8.5 8.1 7.6 7.3 7.2 7.2 6.9 6.8 ...
##  $ FT.        : num  73.2 84.5 64.2 78.1 79.7 73.2 70.2 74.7 68.4 83.5 ...
##  $ OREB       : num  3.7 2 3.3 3.4 2.7 0.8 1.5 1.3 2.1 2.1 ...
##  $ DREB       : num  8.3 4.5 8.8 6.9 2.7 4.6 2.6 1.7 2.7 6.6 ...
##  $ REB        : num  12 6.5 12.1 10.3 5.4 6.4 4.1 3.7 4.8 8.7 ...
##  $ AST        : num  2 5.9 3.8 1 3.3 5 7.5 3.9 4.8 2.8 ...
##  $ STL        : num  1.7 2.4 0.8 0.3 1.1 1.8 2.1 1.1 2.5 1.3 ...
##  $ BLK        : num  3.9 0.8 0.6 3.5 0.2 0.9 0.3 1.1 1 1 ...
##  $ TOV        : num  3.1 0.9 2.7 3 0.9 2.9 4.4 0.9 0.9 3.4 ...
##  $ TARGET_5Yrs: int  1 1 1 1 1 1 1 1 1 1 ...

Splitting Data

set.seed(1704)
in.train <- caret::createDataPartition(as.factor(dataku$TARGET_5Yrs), p=0.8, list=FALSE)
dataku.train <- dataku[in.train,]
dataku.test <- dataku[-in.train,]

Pemodelan & Seleksi Peubah

dataku.glm0 <- glm(TARGET_5Yrs ~ ., family = binomial, dataku.train)
dataku.glm.step <- step(dataku.glm0)
## Start:  AIC=1248.76
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA + 
##     X3P. + FTM + FTA + FT. + OREB + DREB + REB + AST + STL + 
##     BLK + TOV
## 
##            Df Deviance    AIC
## - FT.       1   1208.8 1246.8
## - REB       1   1208.8 1246.8
## - DREB      1   1208.9 1246.9
## - TOV       1   1208.9 1246.9
## - FG.       1   1209.0 1247.0
## - STL       1   1209.0 1247.0
## - X3P.      1   1209.2 1247.2
## - X3P.Made  1   1209.5 1247.5
## - FGA       1   1209.6 1247.6
## - FGM       1   1209.6 1247.6
## - X3PA      1   1209.7 1247.7
## - MIN       1   1210.5 1248.5
## - PTS       1   1210.6 1248.6
## <none>          1208.8 1248.8
## - BLK       1   1211.2 1249.2
## - FTM       1   1212.8 1250.8
## - AST       1   1213.0 1251.0
## - OREB      1   1213.2 1251.2
## - FTA       1   1213.6 1251.6
## - GP        1   1246.8 1284.8
## 
## Step:  AIC=1246.78
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA + 
##     X3P. + FTM + FTA + OREB + DREB + REB + AST + STL + BLK + 
##     TOV
## 
##            Df Deviance    AIC
## - REB       1   1208.8 1244.8
## - DREB      1   1208.9 1244.9
## - TOV       1   1209.0 1245.0
## - FG.       1   1209.0 1245.0
## - STL       1   1209.0 1245.0
## - X3P.      1   1209.2 1245.2
## - X3P.Made  1   1209.5 1245.5
## - FGA       1   1209.7 1245.7
## - FGM       1   1209.7 1245.7
## - X3PA      1   1209.7 1245.7
## - MIN       1   1210.5 1246.5
## - PTS       1   1210.7 1246.7
## <none>          1208.8 1246.8
## - BLK       1   1211.2 1247.2
## - AST       1   1213.0 1249.0
## - OREB      1   1213.2 1249.2
## - FTM       1   1213.3 1249.3
## - FTA       1   1214.1 1250.1
## - GP        1   1246.8 1282.8
## 
## Step:  AIC=1244.81
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA + 
##     X3P. + FTM + FTA + OREB + DREB + AST + STL + BLK + TOV
## 
##            Df Deviance    AIC
## - TOV       1   1209.0 1243.0
## - FG.       1   1209.0 1243.0
## - STL       1   1209.0 1243.0
## - DREB      1   1209.0 1243.0
## - X3P.      1   1209.2 1243.2
## - X3P.Made  1   1209.6 1243.6
## - FGM       1   1209.7 1243.7
## - FGA       1   1209.7 1243.7
## - X3PA      1   1209.7 1243.7
## - MIN       1   1210.5 1244.5
## - PTS       1   1210.7 1244.7
## <none>          1208.8 1244.8
## - BLK       1   1211.3 1245.3
## - AST       1   1213.0 1247.0
## - FTM       1   1213.3 1247.3
## - FTA       1   1214.1 1248.1
## - OREB      1   1220.9 1254.9
## - GP        1   1247.0 1281.0
## 
## Step:  AIC=1242.98
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA + 
##     X3P. + FTM + FTA + OREB + DREB + AST + STL + BLK
## 
##            Df Deviance    AIC
## - DREB      1   1209.2 1241.2
## - STL       1   1209.2 1241.2
## - FG.       1   1209.2 1241.2
## - X3P.      1   1209.4 1241.4
## - FGM       1   1209.8 1241.8
## - FGA       1   1209.8 1241.8
## - X3P.Made  1   1210.0 1242.0
## - X3PA      1   1210.3 1242.3
## - MIN       1   1210.7 1242.7
## <none>          1209.0 1243.0
## - PTS       1   1211.0 1243.0
## - BLK       1   1211.5 1243.5
## - FTM       1   1213.4 1245.4
## - FTA       1   1214.1 1246.1
## - AST       1   1214.4 1246.4
## - OREB      1   1220.9 1252.9
## - GP        1   1248.4 1280.4
## 
## Step:  AIC=1241.15
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA + 
##     X3P. + FTM + FTA + OREB + AST + STL + BLK
## 
##            Df Deviance    AIC
## - STL       1   1209.3 1239.3
## - FG.       1   1209.4 1239.4
## - X3P.      1   1209.5 1239.5
## - FGM       1   1209.9 1239.9
## - FGA       1   1210.0 1240.0
## - X3P.Made  1   1210.1 1240.1
## - X3PA      1   1210.4 1240.4
## <none>          1209.2 1241.2
## - BLK       1   1211.5 1241.5
## - MIN       1   1211.7 1241.7
## - PTS       1   1211.7 1241.7
## - FTM       1   1213.7 1243.7
## - FTA       1   1214.5 1244.5
## - AST       1   1214.7 1244.7
## - OREB      1   1222.7 1252.7
## - GP        1   1248.4 1278.4
## 
## Step:  AIC=1239.35
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + FG. + X3P.Made + X3PA + 
##     X3P. + FTM + FTA + OREB + AST + BLK
## 
##            Df Deviance    AIC
## - FG.       1   1209.5 1237.5
## - X3P.      1   1209.7 1237.7
## - FGM       1   1210.1 1238.1
## - FGA       1   1210.2 1238.2
## - X3P.Made  1   1210.3 1238.3
## - X3PA      1   1210.6 1238.6
## <none>          1209.3 1239.3
## - BLK       1   1211.7 1239.7
## - PTS       1   1211.8 1239.8
## - MIN       1   1212.1 1240.1
## - FTM       1   1214.0 1242.0
## - FTA       1   1214.8 1242.8
## - AST       1   1215.3 1243.3
## - OREB      1   1222.7 1250.7
## - GP        1   1248.5 1276.5
## 
## Step:  AIC=1237.54
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + X3P.Made + X3PA + 
##     X3P. + FTM + FTA + OREB + AST + BLK
## 
##            Df Deviance    AIC
## - X3P.      1   1209.9 1235.9
## - FGM       1   1210.2 1236.2
## - FGA       1   1210.2 1236.2
## - X3P.Made  1   1210.5 1236.5
## - X3PA      1   1210.8 1236.8
## <none>          1209.5 1237.5
## - BLK       1   1211.9 1237.9
## - PTS       1   1212.0 1238.0
## - MIN       1   1212.2 1238.2
## - FTM       1   1214.3 1240.3
## - FTA       1   1215.2 1241.2
## - AST       1   1215.4 1241.4
## - OREB      1   1222.7 1248.7
## - GP        1   1249.0 1275.0
## 
## Step:  AIC=1235.9
## TARGET_5Yrs ~ GP + MIN + PTS + FGM + FGA + X3P.Made + X3PA + 
##     FTM + FTA + OREB + AST + BLK
## 
##            Df Deviance    AIC
## - FGM       1   1210.6 1234.6
## - FGA       1   1210.6 1234.6
## - X3PA      1   1211.1 1235.1
## - X3P.Made  1   1211.1 1235.1
## <none>          1209.9 1235.9
## - BLK       1   1212.1 1236.1
## - PTS       1   1212.3 1236.3
## - MIN       1   1212.6 1236.6
## - FTM       1   1215.0 1239.0
## - AST       1   1215.8 1239.8
## - FTA       1   1215.9 1239.9
## - OREB      1   1222.8 1246.8
## - GP        1   1249.3 1273.3
## 
## Step:  AIC=1234.57
## TARGET_5Yrs ~ GP + MIN + PTS + FGA + X3P.Made + X3PA + FTM + 
##     FTA + OREB + AST + BLK
## 
##            Df Deviance    AIC
## - FGA       1   1210.6 1232.6
## - X3P.Made  1   1212.1 1234.1
## <none>          1210.6 1234.6
## - X3PA      1   1212.6 1234.6
## - BLK       1   1213.0 1235.0
## - MIN       1   1213.2 1235.2
## - PTS       1   1214.2 1236.2
## - FTM       1   1215.6 1237.6
## - AST       1   1216.4 1238.4
## - FTA       1   1216.4 1238.4
## - OREB      1   1224.7 1246.7
## - GP        1   1251.0 1273.0
## 
## Step:  AIC=1232.63
## TARGET_5Yrs ~ GP + MIN + PTS + X3P.Made + X3PA + FTM + FTA + 
##     OREB + AST + BLK
## 
##            Df Deviance    AIC
## - X3P.Made  1   1212.2 1232.2
## <none>          1210.6 1232.6
## - X3PA      1   1212.8 1232.8
## - BLK       1   1213.1 1233.1
## - MIN       1   1213.7 1233.7
## - FTM       1   1215.6 1235.6
## - PTS       1   1216.0 1236.0
## - AST       1   1216.4 1236.4
## - FTA       1   1216.5 1236.5
## - OREB      1   1224.7 1244.7
## - GP        1   1251.2 1271.2
## 
## Step:  AIC=1232.18
## TARGET_5Yrs ~ GP + MIN + PTS + X3PA + FTM + FTA + OREB + AST + 
##     BLK
## 
##        Df Deviance    AIC
## - X3PA  1   1212.8 1230.8
## <none>      1212.2 1232.2
## - BLK   1   1214.6 1232.6
## - MIN   1   1215.0 1233.0
## - AST   1   1217.0 1235.0
## - FTM   1   1217.1 1235.1
## - PTS   1   1217.4 1235.4
## - FTA   1   1218.2 1236.2
## - OREB  1   1225.9 1243.9
## - GP    1   1254.7 1272.7
## 
## Step:  AIC=1230.76
## TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB + AST + BLK
## 
##        Df Deviance    AIC
## <none>      1212.8 1230.8
## - BLK   1   1215.2 1231.2
## - MIN   1   1217.1 1233.1
## - PTS   1   1217.4 1233.4
## - FTM   1   1217.6 1233.6
## - AST   1   1218.0 1234.0
## - FTA   1   1218.4 1234.4
## - OREB  1   1231.8 1247.8
## - GP    1   1257.5 1273.5
summary(dataku.glm.step)
## 
## Call:
## glm(formula = TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB + 
##     AST + BLK, family = binomial, data = dataku.train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5155  -1.0281   0.5267   0.9067   2.2067  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.301880   0.285271  -8.069 7.08e-16 ***
## GP           0.033123   0.005066   6.539 6.20e-11 ***
## MIN         -0.046123   0.022075  -2.089   0.0367 *  
## PTS          0.099187   0.044762   2.216   0.0267 *  
## FTM          0.914006   0.429623   2.127   0.0334 *  
## FTA         -0.731323   0.315856  -2.315   0.0206 *  
## OREB         0.789246   0.188015   4.198 2.70e-05 ***
## AST          0.194997   0.087549   2.227   0.0259 *  
## BLK          0.420820   0.274213   1.535   0.1249    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1425.3  on 1072  degrees of freedom
## Residual deviance: 1212.8  on 1064  degrees of freedom
## AIC: 1230.8
## 
## Number of Fisher Scoring iterations: 5
dataku.glm.final <- glm(formula = TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB + AST + BLK, family = binomial, data = dataku.train)

summary(dataku.glm.final)
## 
## Call:
## glm(formula = TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB + 
##     AST + BLK, family = binomial, data = dataku.train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5155  -1.0281   0.5267   0.9067   2.2067  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.301880   0.285271  -8.069 7.08e-16 ***
## GP           0.033123   0.005066   6.539 6.20e-11 ***
## MIN         -0.046123   0.022075  -2.089   0.0367 *  
## PTS          0.099187   0.044762   2.216   0.0267 *  
## FTM          0.914006   0.429623   2.127   0.0334 *  
## FTA         -0.731323   0.315856  -2.315   0.0206 *  
## OREB         0.789246   0.188015   4.198 2.70e-05 ***
## AST          0.194997   0.087549   2.227   0.0259 *  
## BLK          0.420820   0.274213   1.535   0.1249    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1425.3  on 1072  degrees of freedom
## Residual deviance: 1212.8  on 1064  degrees of freedom
## AIC: 1230.8
## 
## Number of Fisher Scoring iterations: 5

Interpretasi Koefisien Model

exp(dataku.glm.final$coefficients)
## (Intercept)          GP         MIN         PTS         FTM         FTA 
##   0.1000705   1.0336781   0.9549247   1.1042732   2.4942937   0.4812719 
##        OREB         AST         BLK 
##   2.2017353   1.2153074   1.5232105

Uji Parameter Model

#Model tanpa peubah x
modeltanpax <- glm(formula = TARGET_5Yrs ~ 1, family = binomial, data = dataku.train)

Uji Deviance (simultan)

#Beberapa alternatif perhitungan statistik uji G2 pada R
modeltanpax$deviance-dataku.glm.final$deviance
## [1] 212.5711
1-pchisq(dataku.glm.final$null.deviance-dataku.glm.final$deviance, dataku.glm.final$df.null-dataku.glm.final$df.residual)
## [1] 0
qchisq(1-0.05, df=dataku.glm.final$df.null-dataku.glm.final$df.residual)
## [1] 15.50731
dataku.glm.final$null.deviance-dataku.glm.final$deviance
## [1] 212.5711
(-2*logLik(modeltanpax))-(-2*logLik(dataku.glm.final))
## 'log Lik.' 212.5711 (df=1)

Uji Rasio Likeihood (pembandingan)

lrtest(modeltanpax, dataku.glm.final)
## Likelihood ratio test
## 
## Model 1: TARGET_5Yrs ~ 1
## Model 2: TARGET_5Yrs ~ GP + MIN + PTS + FTM + FTA + OREB + AST + BLK
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   1 -712.67                         
## 2   9 -606.38  8 212.57  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Uji Wald (Parsial)

Anova(dataku.glm.final,type='II',test='Wald')
## Analysis of Deviance Table (Type II tests)
## 
## Response: TARGET_5Yrs
##      Df   Chisq Pr(>Chisq)    
## GP    1 42.7561  6.201e-11 ***
## MIN   1  4.3654    0.03668 *  
## PTS   1  4.9102    0.02670 *  
## FTM   1  4.5261    0.03338 *  
## FTA   1  5.3609    0.02059 *  
## OREB  1 17.6215  2.695e-05 ***
## AST   1  4.9608    0.02593 *  
## BLK   1  2.3551    0.12487    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Uji Kecocokan Model

#Hosmer Lemeshow Test
hoslem.test(dataku.train$TARGET_5Yrs,dataku.glm.final$fitted.values)
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  dataku.train$TARGET_5Yrs, dataku.glm.final$fitted.values
## X-squared = 6.006, df = 8, p-value = 0.6466

Evaluasi Model

fit.final <- fitted.values(dataku.glm.final)
pred.final <- ifelse(fit.final>=0.5,"GOOD","BAD")
tab <- table(dataku.train$TARGET_5Yrs, pred.final, dnn = c("Truth", "Predicted"))
tab
##      Predicted
## Truth BAD GOOD
##     0 216  192
##     1 117  548

Akurasi

akurasi <- sum(diag(tab))/sum(tab)
akurasi
## [1] 0.7120224

Confusion Matrix Y Aktual & Prediksi

pdata=predict(dataku.glm.final,newdata=dataku.train,type="response")
y_prediksi<-ifelse(pdata<0.5,"bad","good")
y_aktual<-dataku.train$TARGET_5Yrs
klf=table(y_aktual,y_prediksi)
accuracy=(klf[1,1]+klf[2,2])/sum(klf)*100
sensitivity= klf[2,2]/sum(klf[2,])*100
specificity= klf[1,1]/sum(klf[1,])*100 
fprate=klf[2,1]/(klf[2,1]+klf[1,1])*100
AUC=(100+sensitivity-fprate)/2 
performa=data.frame(accuracy,sensitivity,specificity,AUC)

klf
##         y_prediksi
## y_aktual bad good
##        0 216  192
##        1 117  548
performa
##   accuracy sensitivity specificity      AUC
## 1 71.20224    82.40602    52.94118 73.63544

ROC plot

pred<-
prediction(predict.glm(dataku.glm.final,dataku.test),dataku.test$TARGET_5Yrs)
perf <- performance(pred,"tpr","fpr")
plot(perf)