Regresi Logistik
Dataset
Dataset yang akan digunakan adalah data social network ads. Selanjutnya ingiin mengkaji keefektifan iklan lewat media sosial. * Peubah respon (Y) adalah status pembelian produk yang diiklankan (Purchased: (1) membeli, (0) tidak membeli) * Peubah penjelas (X) merupakan profil konsumen diantaranya: 1. Jenis kelamin (Gender: Male-Female) 2. Umur (Umur dalam tahun) 3. Pendapatan (EstimatedSalary dalam ribu rupiah)
#Untuk membaca data dari exel
library(readxl)
Mydt <- read_xlsx("D:/Bahan Ajar/Social_Network_Ads.xlsx")
head(Mydt)## # A tibble: 6 x 5
## `User ID` Gender Age EstimatedSalary Purchased
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 15624510 Male 19 2100 0
## 2 15810944 Male 35 2200 0
## 3 15668575 Female 26 4800 0
## 4 15603246 Female 27 6300 0
## 5 15804002 Male 19 8400 0
## 6 15728773 Male 27 6400 0
Deskripsi Data
# 1.(Age&Salary)
summary(Mydt[,3:4])## Age EstimatedSalary
## Min. :18.00 Min. : 1700
## 1st Qu.:29.75 1st Qu.: 4800
## Median :37.00 Median : 7800
## Mean :37.66 Mean : 7754
## 3rd Qu.:46.00 3rd Qu.: 9800
## Max. :60.00 Max. :16700
sapply(Mydt[3:4], sd)## Age EstimatedSalary
## 10.48288 3788.55709
# 2.Tabulasi Gender
table(Mydt$Gender)##
## Female Male
## 204 196
table(Mydt$Gender)/nrow(Mydt)*100##
## Female Male
## 51 49
# 3. Tab Purchased
table(Mydt$Purchased)##
## 0 1
## 257 143
table(Mydt$Purchased)/nrow(Mydt)*100##
## 0 1
## 64.25 35.75
# 4. crosstab Gender, Purchased
table(Mydt$Gender, Mydt$Purchased)##
## 0 1
## Female 127 77
## Male 130 66
Barplot
library(ggplot2)## Warning: package 'ggplot2' was built under R version 4.0.4
ggplot(Mydt, aes(x=as.factor(Purchased), y=Age)) +
geom_boxplot(fill = c("#AFEEEE","#F5DEB3"), outlier.colour = "darkred") +
xlab("Purchased") +
ylab("Age")+
theme_minimal()+
theme(legend.position="none")library(ggplot2)
ggplot(Mydt, aes(x=as.factor(Purchased), y=Age)) +
geom_boxplot(fill = c("#AFEEEE","#F5DEB3"), outlier.colour = "darkred") +
xlab("Purchased") +
ylab("Age")+
theme_minimal()+
theme(legend.position="none")ggplot(Mydt, aes(x=as.factor(Purchased), y=EstimatedSalary)) +
geom_boxplot(fill = c("#E9967A","#8FBC8F"), outlier.colour = "darkred") +
xlab("Purchased") +
ylab("Estimated Salary")+
theme_minimal()+
theme(legend.position="none")# Summary Age & Salary for each Purchased
library(psych)##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
describeBy(Mydt[,3:4], Mydt$Purchased)##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max
## Age 1 257 32.79 7.99 34 32.71 8.90 18 59
## EstimatedSalary 2 257 6731.91 2706.03 6800 6726.09 2816.94 1700 15700
## range skew kurtosis se
## Age 41 0.21 -0.17 0.5
## EstimatedSalary 14000 0.18 0.19 168.8
## ------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max
## Age 1 143 46.39 8.61 47 46.73 10.38 27 60
## EstimatedSalary 2 143 9591.61 4673.09 10000 9631.30 6819.96 2200 16700
## range skew kurtosis se
## Age 33 -0.28 -0.76 0.72
## EstimatedSalary 14500 -0.15 -1.37 390.78
Persiapan Data
Cek bias peubah respon (Y) [Purchased]
table(Mydt$Purchased)##
## 0 1
## 257 143
Terlihat antara kategori Tidak Membeli (0) dan Membeli (1) memiliki proporsi data yang tidak seimbang.
Pembagian data latih dan data uji
input0 <- Mydt[which(Mydt$Purchased == 0),]
input1 <- Mydt[which(Mydt$Purchased == 1),]
set.seed(11)
input0_training = sample(1:nrow(input0), 0.9 * nrow(input1))
input1_training = sample(1:nrow(input1), 0.9*nrow(input1))
training0 = input0[input0_training,]
training1 = input1[input1_training,]
Tr.dt = rbind(training1,training0)
test0 = input0[-input0_training,]
test1 = input1[-input1_training,]
Test.dt = rbind(test1,test0)Regresi Logistik
Model 1 dengan semua peubah dimasukkan
logit.Mod = glm(Tr.dt$Purchased ~ Gender + Age + EstimatedSalary, data = Tr.dt)
summary(logit.Mod)##
## Call:
## glm(formula = Tr.dt$Purchased ~ Gender + Age + EstimatedSalary,
## data = Tr.dt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.05404 -0.28402 -0.00201 0.31895 0.76902
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.979e-01 1.044e-01 -8.602 8.47e-16 ***
## GenderMale 6.221e-03 4.684e-02 0.133 0.894
## Age 2.742e-02 2.183e-03 12.562 < 2e-16 ***
## EstimatedSalary 3.631e-05 5.788e-06 6.274 1.53e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1388133)
##
## Null deviance: 64.000 on 255 degrees of freedom
## Residual deviance: 34.981 on 252 degrees of freedom
## AIC: 226.96
##
## Number of Fisher Scoring iterations: 2
Model 2 dengan menghilangkan peubah penjelas yang tidak signifikan
logit.Mod2 <- glm(Tr.dt$Purchased ~ Age + EstimatedSalary, data = Tr.dt, family = binomial(link = "logit"))
summary(logit.Mod2)##
## Call:
## glm(formula = Tr.dt$Purchased ~ Age + EstimatedSalary, family = binomial(link = "logit"),
## data = Tr.dt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.00602 -0.55171 -0.00295 0.62681 2.06964
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.120e+01 1.476e+00 -7.586 3.30e-14 ***
## Age 2.179e-01 2.946e-02 7.398 1.38e-13 ***
## EstimatedSalary 3.081e-04 5.558e-05 5.544 2.96e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 354.89 on 255 degrees of freedom
## Residual deviance: 199.48 on 253 degrees of freedom
## AIC: 205.48
##
## Number of Fisher Scoring iterations: 6
Menghitung Nilai Odd Ratio
beta = coef(logit.Mod)
OR = exp(beta)
cbind(beta, OR)## beta OR
## (Intercept) -8.979444e-01 0.4074063
## GenderMale 6.220556e-03 1.0062399
## Age 2.742266e-02 1.0278021
## EstimatedSalary 3.630957e-05 1.0000363
beta = coef(logit.Mod2)
OR = exp(beta)
cbind(beta, OR)## beta OR
## (Intercept) -1.119715e+01 1.371321e-05
## Age 2.179059e-01 1.243470e+00
## EstimatedSalary 3.081256e-04 1.000308e+00
Menghitung Prediksi
logit.Mod2$data## # A tibble: 256 x 5
## `User ID` Gender Age EstimatedSalary Purchased
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 15624755 Female 54 2900 1
## 2 15600379 Male 48 10000 1
## 3 15577514 Male 43 14300 1
## 4 15694879 Male 37 16000 1
## 5 15764604 Male 49 9800 1
## 6 15722061 Female 51 16200 1
## 7 15694288 Female 32 13000 1
## 8 15746203 Female 47 16000 1
## 9 15774872 Female 52 15300 1
## 10 15646936 Male 53 8000 1
## # ... with 246 more rows
logit.Mod2$fitted.values## 1 2 3 4 5 6
## 0.812041525 0.912423125 0.929497602 0.857585094 0.924132539 0.992664617
## 7 8 9 10 11 12
## 0.445583774 0.981556626 0.992219131 0.943580406 0.992552462 0.342564816
## 13 14 15 16 17 18
## 0.939090186 0.954699507 0.592912573 0.738106768 0.663673668 0.468518510
## 19 20 21 22 23 24
## 0.717206134 0.490478398 0.904003045 0.961402675 0.880119141 0.613967571
## 25 26 27 28 29 30
## 0.964012247 0.117455601 0.863674432 0.981596735 0.515305092 0.607168633
## 31 32 33 34 35 36
## 0.599263096 0.431018071 0.892549151 0.869011787 0.599263096 0.342564816
## 37 38 39 40 41 42
## 0.673884698 0.979232956 0.765985192 0.671006134 0.509209431 0.690132949
## 43 44 45 46 47 48
## 0.957190687 0.984388658 0.839215526 0.986846266 0.717157670 0.869011787
## 49 50 51 52 53 54
## 0.994646733 0.607168633 0.937679606 0.993760634 0.667569663 0.671984652
## 55 56 57 58 59 60
## 0.509763723 0.508100782 0.294325862 0.439623769 0.464713863 0.483948342
## 61 62 63 64 65 66
## 0.828845890 0.670026130 0.664609837 0.951346094 0.995565923 0.645226498
## 67 68 69 70 71 72
## 0.977267495 0.998583986 0.347468924 0.683505397 0.561766108 0.995316526
## 73 74 75 76 77 78
## 0.388424145 0.537241215 0.919697385 0.954507273 0.915866690 0.959136777
## 79 80 81 82 83 84
## 0.715354618 0.560673752 0.985902369 0.445642802 0.543732699 0.821882285
## 85 86 87 88 89 90
## 0.598139922 0.723365737 0.856799818 0.996188208 0.553618609 0.610808730
## 91 92 93 94 95 96
## 0.864946990 0.997753884 0.806602282 0.724252521 0.691080769 0.270264634
## 97 98 99 100 101 102
## 0.926264822 0.378052526 0.989847539 0.933720562 0.991092877 0.965289699
## 103 104 105 106 107 108
## 0.990796510 0.699021634 0.969190616 0.994931615 0.569336689 0.395768362
## 109 110 111 112 113 114
## 0.824751226 0.816331208 0.399961924 0.309177138 0.955638996 0.996221749
## 115 116 117 118 119 120
## 0.387370910 0.991053632 0.820941248 0.944388724 0.986173883 0.994785362
## 121 122 123 124 125 126
## 0.955628866 0.958347200 0.461464468 0.998293153 0.823177345 0.718505410
## 127 128 129 130 131 132
## 0.965289699 0.751873161 0.449422404 0.030326456 0.845370240 0.006626678
## 133 134 135 136 137 138
## 0.243008918 0.158270027 0.003125502 0.004120232 0.044695481 0.093103376
## 139 140 141 142 143 144
## 0.983695941 0.267260353 0.108421371 0.989090155 0.550327622 0.359579124
## 145 146 147 148 149 150
## 0.001746749 0.248721847 0.305450894 0.434770916 0.059004815 0.445583774
## 151 152 153 154 155 156
## 0.339574025 0.542691289 0.233039997 0.009437705 0.059485971 0.297560796
## 157 158 159 160 161 162
## 0.034168383 0.365675413 0.155660226 0.543241685 0.938949500 0.020212628
## 163 164 165 166 167 168
## 0.266392553 0.135425706 0.064873149 0.352514672 0.456507669 0.023101843
## 169 170 171 172 173 174
## 0.172871547 0.025828933 0.098635815 0.034168383 0.059610182 0.089444350
## 175 176 177 178 179 180
## 0.015359249 0.535586566 0.181531749 0.079575807 0.293454908 0.006847692
## 181 182 183 184 185 186
## 0.043031331 0.223359362 0.036572812 0.036416832 0.049330203 0.015426480
## 187 188 189 190 191 192
## 0.055674741 0.118378381 0.018067525 0.032330550 0.017376782 0.095354520
## 193 194 195 196 197 198
## 0.434225941 0.331714334 0.114748873 0.232643810 0.096875251 0.019152209
## 199 200 201 202 203 204
## 0.338579917 0.383225049 0.011307663 0.211369059 0.617584348 0.021196979
## 205 206 207 208 209 210
## 0.006641295 0.097069475 0.032882698 0.443452123 0.095737860 0.487766968
## 211 212 213 214 215 216
## 0.073515005 0.503170764 0.451069404 0.403159901 0.017080402 0.711776858
## 217 218 219 220 221 222
## 0.090351672 0.660696784 0.988948226 0.836232927 0.009859224 0.128371022
## 223 224 225 226 227 228
## 0.017490762 0.383225049 0.881516151 0.368251585 0.403159901 0.021335473
## 229 230 231 232 233 234
## 0.186154817 0.065007831 0.163325467 0.007060458 0.007152672 0.326868606
## 235 236 237 238 239 240
## 0.206278522 0.013166316 0.957200476 0.261698006 0.005324346 0.451069404
## 241 242 243 244 245 246
## 0.136467836 0.044695481 0.019403807 0.016715924 0.450520283 0.033166008
## 247 248 249 250 251 252
## 0.519122246 0.531722733 0.094212729 0.893162830 0.222590806 0.073969493
## 253 254 255 256
## 0.442904793 0.232248092 0.012997589 0.221824140