Regresi Logistik

Dataset

Dataset yang akan digunakan adalah data social network ads. Selanjutnya ingiin mengkaji keefektifan iklan lewat media sosial. * Peubah respon (Y) adalah status pembelian produk yang diiklankan (Purchased: (1) membeli, (0) tidak membeli) * Peubah penjelas (X) merupakan profil konsumen diantaranya: 1. Jenis kelamin (Gender: Male-Female) 2. Umur (Umur dalam tahun) 3. Pendapatan (EstimatedSalary dalam ribu rupiah)

#Untuk membaca data dari exel
library(readxl)
Mydt <- read_xlsx("D:/Bahan Ajar/Social_Network_Ads.xlsx")
head(Mydt)
## # A tibble: 6 x 5
##   `User ID` Gender   Age EstimatedSalary Purchased
##       <dbl> <chr>  <dbl>           <dbl>     <dbl>
## 1  15624510 Male      19            2100         0
## 2  15810944 Male      35            2200         0
## 3  15668575 Female    26            4800         0
## 4  15603246 Female    27            6300         0
## 5  15804002 Male      19            8400         0
## 6  15728773 Male      27            6400         0

Deskripsi Data

# 1.(Age&Salary)
summary(Mydt[,3:4])
##       Age        EstimatedSalary
##  Min.   :18.00   Min.   : 1700  
##  1st Qu.:29.75   1st Qu.: 4800  
##  Median :37.00   Median : 7800  
##  Mean   :37.66   Mean   : 7754  
##  3rd Qu.:46.00   3rd Qu.: 9800  
##  Max.   :60.00   Max.   :16700
sapply(Mydt[3:4], sd)
##             Age EstimatedSalary 
##        10.48288      3788.55709
# 2.Tabulasi Gender
table(Mydt$Gender)
## 
## Female   Male 
##    204    196
table(Mydt$Gender)/nrow(Mydt)*100
## 
## Female   Male 
##     51     49
# 3. Tab Purchased
table(Mydt$Purchased)
## 
##   0   1 
## 257 143
table(Mydt$Purchased)/nrow(Mydt)*100
## 
##     0     1 
## 64.25 35.75
# 4. crosstab Gender, Purchased
table(Mydt$Gender, Mydt$Purchased)
##         
##            0   1
##   Female 127  77
##   Male   130  66

Barplot

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.4
ggplot(Mydt, aes(x=as.factor(Purchased), y=Age)) + 
  geom_boxplot(fill = c("#AFEEEE","#F5DEB3"), outlier.colour = "darkred") +
  xlab("Purchased") +
  ylab("Age")+
  theme_minimal()+
  theme(legend.position="none")

library(ggplot2)
ggplot(Mydt, aes(x=as.factor(Purchased), y=Age)) + 
  geom_boxplot(fill = c("#AFEEEE","#F5DEB3"), outlier.colour = "darkred") +
  xlab("Purchased") +
  ylab("Age")+
  theme_minimal()+
  theme(legend.position="none")

ggplot(Mydt, aes(x=as.factor(Purchased), y=EstimatedSalary)) + 
  geom_boxplot(fill = c("#E9967A","#8FBC8F"), outlier.colour = "darkred") +
  xlab("Purchased") +
  ylab("Estimated Salary")+
  theme_minimal()+
  theme(legend.position="none")

# Summary Age & Salary for each Purchased
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
describeBy(Mydt[,3:4], Mydt$Purchased)
## 
##  Descriptive statistics by group 
## group: 0
##                 vars   n    mean      sd median trimmed     mad  min   max
## Age                1 257   32.79    7.99     34   32.71    8.90   18    59
## EstimatedSalary    2 257 6731.91 2706.03   6800 6726.09 2816.94 1700 15700
##                 range skew kurtosis    se
## Age                41 0.21    -0.17   0.5
## EstimatedSalary 14000 0.18     0.19 168.8
## ------------------------------------------------------------ 
## group: 1
##                 vars   n    mean      sd median trimmed     mad  min   max
## Age                1 143   46.39    8.61     47   46.73   10.38   27    60
## EstimatedSalary    2 143 9591.61 4673.09  10000 9631.30 6819.96 2200 16700
##                 range  skew kurtosis     se
## Age                33 -0.28    -0.76   0.72
## EstimatedSalary 14500 -0.15    -1.37 390.78

Persiapan Data

Cek bias peubah respon (Y) [Purchased]

table(Mydt$Purchased)
## 
##   0   1 
## 257 143

Terlihat antara kategori Tidak Membeli (0) dan Membeli (1) memiliki proporsi data yang tidak seimbang.

Pembagian data latih dan data uji

input0 <- Mydt[which(Mydt$Purchased == 0),]
input1 <- Mydt[which(Mydt$Purchased == 1),]

set.seed(11)
input0_training = sample(1:nrow(input0), 0.9 * nrow(input1))
input1_training = sample(1:nrow(input1), 0.9*nrow(input1))

training0 = input0[input0_training,]
training1 = input1[input1_training,]
Tr.dt = rbind(training1,training0)

test0 = input0[-input0_training,]
test1 = input1[-input1_training,]
Test.dt = rbind(test1,test0)

Regresi Logistik

Model 1 dengan semua peubah dimasukkan

logit.Mod = glm(Tr.dt$Purchased ~ Gender + Age + EstimatedSalary, data = Tr.dt)
summary(logit.Mod)
## 
## Call:
## glm(formula = Tr.dt$Purchased ~ Gender + Age + EstimatedSalary, 
##     data = Tr.dt)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.05404  -0.28402  -0.00201   0.31895   0.76902  
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -8.979e-01  1.044e-01  -8.602 8.47e-16 ***
## GenderMale       6.221e-03  4.684e-02   0.133    0.894    
## Age              2.742e-02  2.183e-03  12.562  < 2e-16 ***
## EstimatedSalary  3.631e-05  5.788e-06   6.274 1.53e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1388133)
## 
##     Null deviance: 64.000  on 255  degrees of freedom
## Residual deviance: 34.981  on 252  degrees of freedom
## AIC: 226.96
## 
## Number of Fisher Scoring iterations: 2

Model 2 dengan menghilangkan peubah penjelas yang tidak signifikan

logit.Mod2 <- glm(Tr.dt$Purchased ~ Age + EstimatedSalary, data = Tr.dt, family = binomial(link = "logit"))
summary(logit.Mod2)
## 
## Call:
## glm(formula = Tr.dt$Purchased ~ Age + EstimatedSalary, family = binomial(link = "logit"), 
##     data = Tr.dt)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -3.00602  -0.55171  -0.00295   0.62681   2.06964  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -1.120e+01  1.476e+00  -7.586 3.30e-14 ***
## Age              2.179e-01  2.946e-02   7.398 1.38e-13 ***
## EstimatedSalary  3.081e-04  5.558e-05   5.544 2.96e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 354.89  on 255  degrees of freedom
## Residual deviance: 199.48  on 253  degrees of freedom
## AIC: 205.48
## 
## Number of Fisher Scoring iterations: 6

Menghitung Nilai Odd Ratio

beta = coef(logit.Mod)
OR = exp(beta)
cbind(beta, OR)
##                          beta        OR
## (Intercept)     -8.979444e-01 0.4074063
## GenderMale       6.220556e-03 1.0062399
## Age              2.742266e-02 1.0278021
## EstimatedSalary  3.630957e-05 1.0000363
beta = coef(logit.Mod2)
OR = exp(beta)
cbind(beta, OR)
##                          beta           OR
## (Intercept)     -1.119715e+01 1.371321e-05
## Age              2.179059e-01 1.243470e+00
## EstimatedSalary  3.081256e-04 1.000308e+00

Menghitung Prediksi

logit.Mod2$data
## # A tibble: 256 x 5
##    `User ID` Gender   Age EstimatedSalary Purchased
##        <dbl> <chr>  <dbl>           <dbl>     <dbl>
##  1  15624755 Female    54            2900         1
##  2  15600379 Male      48           10000         1
##  3  15577514 Male      43           14300         1
##  4  15694879 Male      37           16000         1
##  5  15764604 Male      49            9800         1
##  6  15722061 Female    51           16200         1
##  7  15694288 Female    32           13000         1
##  8  15746203 Female    47           16000         1
##  9  15774872 Female    52           15300         1
## 10  15646936 Male      53            8000         1
## # ... with 246 more rows
logit.Mod2$fitted.values
##           1           2           3           4           5           6 
## 0.812041525 0.912423125 0.929497602 0.857585094 0.924132539 0.992664617 
##           7           8           9          10          11          12 
## 0.445583774 0.981556626 0.992219131 0.943580406 0.992552462 0.342564816 
##          13          14          15          16          17          18 
## 0.939090186 0.954699507 0.592912573 0.738106768 0.663673668 0.468518510 
##          19          20          21          22          23          24 
## 0.717206134 0.490478398 0.904003045 0.961402675 0.880119141 0.613967571 
##          25          26          27          28          29          30 
## 0.964012247 0.117455601 0.863674432 0.981596735 0.515305092 0.607168633 
##          31          32          33          34          35          36 
## 0.599263096 0.431018071 0.892549151 0.869011787 0.599263096 0.342564816 
##          37          38          39          40          41          42 
## 0.673884698 0.979232956 0.765985192 0.671006134 0.509209431 0.690132949 
##          43          44          45          46          47          48 
## 0.957190687 0.984388658 0.839215526 0.986846266 0.717157670 0.869011787 
##          49          50          51          52          53          54 
## 0.994646733 0.607168633 0.937679606 0.993760634 0.667569663 0.671984652 
##          55          56          57          58          59          60 
## 0.509763723 0.508100782 0.294325862 0.439623769 0.464713863 0.483948342 
##          61          62          63          64          65          66 
## 0.828845890 0.670026130 0.664609837 0.951346094 0.995565923 0.645226498 
##          67          68          69          70          71          72 
## 0.977267495 0.998583986 0.347468924 0.683505397 0.561766108 0.995316526 
##          73          74          75          76          77          78 
## 0.388424145 0.537241215 0.919697385 0.954507273 0.915866690 0.959136777 
##          79          80          81          82          83          84 
## 0.715354618 0.560673752 0.985902369 0.445642802 0.543732699 0.821882285 
##          85          86          87          88          89          90 
## 0.598139922 0.723365737 0.856799818 0.996188208 0.553618609 0.610808730 
##          91          92          93          94          95          96 
## 0.864946990 0.997753884 0.806602282 0.724252521 0.691080769 0.270264634 
##          97          98          99         100         101         102 
## 0.926264822 0.378052526 0.989847539 0.933720562 0.991092877 0.965289699 
##         103         104         105         106         107         108 
## 0.990796510 0.699021634 0.969190616 0.994931615 0.569336689 0.395768362 
##         109         110         111         112         113         114 
## 0.824751226 0.816331208 0.399961924 0.309177138 0.955638996 0.996221749 
##         115         116         117         118         119         120 
## 0.387370910 0.991053632 0.820941248 0.944388724 0.986173883 0.994785362 
##         121         122         123         124         125         126 
## 0.955628866 0.958347200 0.461464468 0.998293153 0.823177345 0.718505410 
##         127         128         129         130         131         132 
## 0.965289699 0.751873161 0.449422404 0.030326456 0.845370240 0.006626678 
##         133         134         135         136         137         138 
## 0.243008918 0.158270027 0.003125502 0.004120232 0.044695481 0.093103376 
##         139         140         141         142         143         144 
## 0.983695941 0.267260353 0.108421371 0.989090155 0.550327622 0.359579124 
##         145         146         147         148         149         150 
## 0.001746749 0.248721847 0.305450894 0.434770916 0.059004815 0.445583774 
##         151         152         153         154         155         156 
## 0.339574025 0.542691289 0.233039997 0.009437705 0.059485971 0.297560796 
##         157         158         159         160         161         162 
## 0.034168383 0.365675413 0.155660226 0.543241685 0.938949500 0.020212628 
##         163         164         165         166         167         168 
## 0.266392553 0.135425706 0.064873149 0.352514672 0.456507669 0.023101843 
##         169         170         171         172         173         174 
## 0.172871547 0.025828933 0.098635815 0.034168383 0.059610182 0.089444350 
##         175         176         177         178         179         180 
## 0.015359249 0.535586566 0.181531749 0.079575807 0.293454908 0.006847692 
##         181         182         183         184         185         186 
## 0.043031331 0.223359362 0.036572812 0.036416832 0.049330203 0.015426480 
##         187         188         189         190         191         192 
## 0.055674741 0.118378381 0.018067525 0.032330550 0.017376782 0.095354520 
##         193         194         195         196         197         198 
## 0.434225941 0.331714334 0.114748873 0.232643810 0.096875251 0.019152209 
##         199         200         201         202         203         204 
## 0.338579917 0.383225049 0.011307663 0.211369059 0.617584348 0.021196979 
##         205         206         207         208         209         210 
## 0.006641295 0.097069475 0.032882698 0.443452123 0.095737860 0.487766968 
##         211         212         213         214         215         216 
## 0.073515005 0.503170764 0.451069404 0.403159901 0.017080402 0.711776858 
##         217         218         219         220         221         222 
## 0.090351672 0.660696784 0.988948226 0.836232927 0.009859224 0.128371022 
##         223         224         225         226         227         228 
## 0.017490762 0.383225049 0.881516151 0.368251585 0.403159901 0.021335473 
##         229         230         231         232         233         234 
## 0.186154817 0.065007831 0.163325467 0.007060458 0.007152672 0.326868606 
##         235         236         237         238         239         240 
## 0.206278522 0.013166316 0.957200476 0.261698006 0.005324346 0.451069404 
##         241         242         243         244         245         246 
## 0.136467836 0.044695481 0.019403807 0.016715924 0.450520283 0.033166008 
##         247         248         249         250         251         252 
## 0.519122246 0.531722733 0.094212729 0.893162830 0.222590806 0.073969493 
##         253         254         255         256 
## 0.442904793 0.232248092 0.012997589 0.221824140