Description

This report provides car sales prediction using classification algorithms.

This dataset contains details of 1000 customers who intend to buy a car, considering their annual salaries.

Columns: User ID Gender Age Annual Salary Purchase Decision (No = 0; Yes = 1)

The dataset is hosted in Kaggle. It can be download here : https://www.kaggle.com/datasets/gabrielsantello/cars-purchase-decision-dataset

1. Data Extraction
2. Exploratory Data Analysis
3. Data Preparation
4. Modeling
5. Evaluation
6. Recommendation

Library

library(ggplot2)
library(gridExtra)
library(dplyr)
library(party)
library(randomForest)
library(e1071)

1. Data Extraction

car_df <- read.csv('D:/DATA SCIENCE/Car Sales Prediction/data/car_data.csv')
car_df
##      User.ID Gender Age AnnualSalary Purchased
## 1        385   Male  35        20000         0
## 2        681   Male  40        43500         0
## 3        353   Male  49        74000         0
## 4        895   Male  40       107500         1
## 5        661   Male  25        79000         0
## 6        846 Female  47        33500         1
## 7        219 Female  46       132500         1
## 8        588   Male  42        64000         0
## 9         85 Female  30        84500         0
## 10       465   Male  41        52000         0
## 11       686   Male  42        80000         0
## 12       408   Male  47        23000         1
## 13       790 Female  32        72500         0
## 14       116 Female  27        57000         0
## 15       118 Female  42       108000         1
## 16        54 Female  33       149000         1
## 17        90   Male  35        75000         0
## 18       372   Male  35        53000         0
## 19       926   Male  46        79000         1
## 20        94 Female  39       134000         1
## 21       338 Female  39        51500         0
## 22       134 Female  49        39000         1
## 23       821   Male  54        25500         1
## 24       294 Female  41        61500         0
## 25       597 Female  31       117500         0
## 26       567   Male  24        58000         0
## 27        20   Male  40       107000         1
## 28       108   Male  40        97500         1
## 29       105 Female  48        29000         1
## 30       994 Female  38       147500         1
## 31       273   Male  45        26000         1
## 32       263   Male  32        67500         0
## 33       423 Female  37        62000         0
## 34        88   Male  41        79500         0
## 35        81 Female  44       113500         1
## 36       257 Female  47        41500         0
## 37       755 Female  38        55000         0
## 38       249 Female  39       114500         1
## 39       121   Male  42        73000         1
## 40       489   Male  26        15000         0
## 41       555 Female  21        37500         0
## 42       222 Female  59        39500         1
## 43       590 Female  39        66500         0
## 44       645 Female  43        80500         0
## 45       689   Male  49        86000         1
## 46       646   Male  37        75000         0
## 47       964 Female  49        76500         0
## 48       384   Male  28       123000         1
## 49       826 Female  59        48500         1
## 50        27 Female  40        60500         0
## 51       268 Female  38        99500         1
## 52       466   Male  51        35500         1
## 53       411 Female  55       130000         1
## 54       445 Female  23        56500         0
## 55       428 Female  49        43500         1
## 56       974 Female  49        36000         1
## 57       828   Male  48        21500         1
## 58       348 Female  49        98500         0
## 59       730   Male  43        59500         0
## 60       820 Female  35       147000         1
## 61       917 Female  50       109500         1
## 62        15   Male  48        26500         1
## 63       365   Male  49        88000         1
## 64       674 Female  38        72500         0
## 65        30   Male  27        58000         0
## 66       531   Male  35        22000         0
## 67       190 Female  35        71000         0
## 68       160   Male  43       129000         1
## 69       371   Male  41        45000         0
## 70       321   Male  39        42000         0
## 71       230 Female  45        40500         0
## 72       819 Female  51        37500         1
## 73       965   Male  20        86500         0
## 74       351 Female  41        63000         0
## 75       266   Male  58        41500         1
## 76       629 Female  50       115500         1
## 77       891 Female  28        55500         0
## 78       311   Male  34        32500         0
## 79       439 Female  57       106500         1
## 80       861 Female  50        53500         1
## 81       510   Male  49        28000         1
## 82       277   Male  53       151500         1
## 83       417   Male  42        88500         1
## 84       123 Female  27        17000         0
## 85       912 Female  59        76000         1
## 86       667 Female  51       136500         1
## 87       435 Female  49       120500         1
## 88       722 Female  32        35500         0
## 89       892   Male  30        44500         0
## 90       279 Female  27        18500         0
## 91       500 Female  36        50000         0
## 92       684   Male  60       144500         1
## 93       414   Male  38        51000         0
## 94       349 Female  29        38500         0
## 95       456 Female  61       103500         1
## 96       485 Female  33       151500         1
## 97       960   Male  45        75500         1
## 98       233 Female  26        17000         0
## 99       191   Male  30        87000         0
## 100      471   Male  38        60500         0
## 101      977 Female  28        84000         0
## 102      120 Female  50        88000         1
## 103      126   Male  26        80000         0
## 104       23   Male  39        42000         0
## 105      225   Male  26        16000         0
## 106      772 Female  51        21500         1
## 107      398   Male  36        56500         0
## 108      367   Male  47        29500         1
## 109      511 Female  18        44000         0
## 110      775   Male  43       109500         1
## 111      324   Male  61        43500         1
## 112      829 Female  41        30000         0
## 113      478   Male  31        58000         0
## 114      546 Female  29        83000         0
## 115      942 Female  48       108500         1
## 116      571 Female  38       112000         0
## 117      375   Male  59       143000         1
## 118      905 Female  33       136500         1
## 119      132   Male  35        61000         0
## 120      438   Male  28        59000         0
## 121      972   Male  30       107000         1
## 122      596   Male  36        99000         1
## 123      395   Male  40        72500         0
## 124      663 Female  33        41000         0
## 125      903   Male  36        23500         0
## 126       25   Male  49        34500         1
## 127      599 Female  40        72000         0
## 128      563 Female  20        23000         0
## 129      607 Female  37       137000         1
## 130      928 Female  47       144000         1
## 131      453 Female  36        24500         0
## 132      509   Male  40        76500         0
## 133      203 Female  59        88000         1
## 134      450 Female  36        75000         0
## 135      573   Male  42        44500         0
## 136      662   Male  27        89000         0
## 137      651   Male  32        77500         0
## 138       19 Female  48        31500         1
## 139      634 Female  38        81500         0
## 140      647   Male  62       145500         1
## 141      693   Male  38        78500         0
## 142      728   Male  40        75000         0
## 143      520   Male  57        72500         1
## 144      213 Female  41       114500         0
## 145      142   Male  22        89500         0
## 146      322 Female  40        74500         0
## 147      540 Female  39        56500         0
## 148      743 Female  39        73000         0
## 149      887 Female  41        71000         0
## 150      241   Male  28        32000         0
## 151      701   Male  39        62500         0
## 152      848   Male  25        33000         0
## 153      782   Male  43        81500         0
## 154      877   Male  48        74000         1
## 155      924   Male  37        79000         1
## 156       83   Male  36       144000         1
## 157      369 Female  32        16500         0
## 158      579 Female  51       121500         1
## 159      583   Male  30        89000         0
## 160      919   Male  36        39500         0
## 161      957   Male  30       135000         1
## 162      973 Female  63       110500         1
## 163      521   Male  48        90000         1
## 164       41 Female  36        78500         0
## 165       42   Male  31       136500         1
## 166      327   Male  58        61500         1
## 167      347 Female  22        55000         0
## 168      148 Female  46        96000         0
## 169      443 Female  63        48500         1
## 170      700 Female  33        28000         0
## 171      738   Male  35        59000         0
## 172      527   Male  41        87000         1
## 173      742   Male  26        81000         0
## 174      175 Female  44       134500         0
## 175      335   Male  39       120500         1
## 176      996   Male  52        67500         0
## 177      378 Female  53        82000         1
## 178      655   Male  43        74500         1
## 179      104   Male  51        23000         1
## 180      221 Female  55       125000         1
## 181      786 Female  60        35500         1
## 182      536 Female  38        59500         0
## 183      460   Male  24        19000         0
## 184      710 Female  56       106500         1
## 185      524 Female  56        36500         1
## 186      890   Male  37        70000         1
## 187       46 Female  61       109500         1
## 188       95   Male  44        81500         0
## 189       78 Female  45       110500         1
## 190      402 Female  55        40500         1
## 191      264   Male  51       143500         0
## 192       34   Male  50        66500         0
## 193      522 Female  26       118000         0
## 194      156   Male  19        19000         0
## 195      850   Male  43        80500         0
## 196       99   Male  31        90500         0
## 197      462   Male  39        62500         0
## 198      979 Female  53        46500         0
## 199      432 Female  44        65500         0
## 200       69   Male  47        25000         1
## 201      943 Female  59        83000         0
## 202      381 Female  42        52500         0
## 203      958   Male  38        54500         0
## 204      954 Female  41        80000         0
## 205      612   Male  35        88000         0
## 206      267   Male  52        90500         1
## 207      785 Female  51       133500         1
## 208      557   Male  20        49000         0
## 209      935   Male  45        32000         1
## 210      805   Male  62       132500         1
## 211      554 Female  54       144500         1
## 212      990 Female  49       132500         1
## 213      345 Female  20        82000         0
## 214      172   Male  43        63500         0
## 215      429 Female  27        16500         0
## 216      948 Female  23        64500         0
## 217       89 Female  58        95000         1
## 218      139   Male  61        43500         1
## 219      332   Male  21        50500         0
## 220       70 Female  34        42500         0
## 221      868 Female  19        87500         0
## 222      747   Male  42        60500         0
## 223      542 Female  47        75500         0
## 224      882   Male  20        77500         0
## 225      744   Male  36        80500         0
## 226      498   Male  40        78500         0
## 227      934   Male  40       146500         1
## 228      253 Female  55       105500         1
## 229      620 Female  40        64500         0
## 230      778 Female  28        55000         0
## 231      796   Male  36        76500         0
## 232      532 Female  51        45500         0
## 233      242   Male  48        33000         1
## 234      504 Female  24        29500         0
## 235      569   Male  30       149500         1
## 236     1000 Female  40        76500         1
## 237      401 Female  62        90500         1
## 238      229   Male  57        60000         1
## 239       91   Male  31        74000         0
## 240      135 Female  27        31000         0
## 241      906   Male  42       104000         1
## 242      605   Male  49        90500         1
## 243      613   Male  36        92500         1
## 244      196   Male  45        66500         0
## 245      167 Female  31        34000         0
## 246      698   Male  42        98500         1
## 247      565 Female  32        90500         0
## 248      669   Male  28        59500         0
## 249      715   Male  37        61500         0
## 250      258 Female  58        38000         1
## 251      604 Female  57        28500         1
## 252      517 Female  51        37500         1
## 253      228 Female  27        36500         0
## 254      987   Male  20        20500         0
## 255      885   Male  50        36500         1
## 256      415 Female  21        83500         0
## 257      357 Female  57       110500         1
## 258      254   Male  40        77500         0
## 259      235 Female  21        24500         0
## 260      282   Male  30        44500         0
## 261      128   Male  42        73500         0
## 262      703 Female  40        60000         0
## 263      131   Male  42        65000         0
## 264      119   Male  43        73500         1
## 265      921 Female  42       136500         1
## 266      881 Female  49        36500         1
## 267      182   Male  21        72000         0
## 268      112 Female  58       127500         1
## 269      342 Female  29        88500         0
## 270      397   Male  27        81500         0
## 271       35 Female  42        53000         0
## 272      145 Female  38        80000         0
## 273      941 Female  34       150500         1
## 274      736 Female  42        79000         0
## 275       68 Female  35        44500         0
## 276      641   Male  35       116500         0
## 277      668 Female  26        43000         0
## 278      673   Male  38        81500         0
## 279      768 Female  42        81500         0
## 280      117 Female  39        75000         1
## 281      633   Male  47        43000         0
## 282      418 Female  28        44000         0
## 283      993 Female  44       139000         1
## 284      902   Male  58        75500         1
## 285      331 Female  32       119500         1
## 286      918 Female  34        29500         0
## 287      622 Female  45       131000         1
## 288       86   Male  31        88500         0
## 289      392   Male  49        89000         1
## 290      162   Male  45        56500         0
## 291      978   Male  26        81500         0
## 292      624   Male  57        61500         1
## 293      640   Male  56       133000         1
## 294       26 Female  47        47000         0
## 295      741 Female  43        54500         0
## 296      704   Male  34        44500         0
## 297      199 Female  20        27500         0
## 298      130 Female  38       149500         1
## 299      179   Male  50        29500         1
## 300      427 Female  49       141000         1
## 301      275   Male  49        81500         1
## 302      330 Female  35        77000         0
## 303      813 Female  28        32500         0
## 304      390 Female  27        73500         0
## 305      774 Female  38       113000         1
## 306      469   Male  49        28000         1
## 307      652   Male  55        71500         1
## 308      729   Male  40        79500         0
## 309      648 Female  20        82000         0
## 310       92 Female  25        33500         0
## 311      904   Male  27        82500         0
## 312      614 Female  51        98500         1
## 313      756 Female  27       137000         1
## 314      316 Female  39       113500         0
## 315      325 Female  37        78000         1
## 316      953   Male  60        34000         1
## 317      675   Male  31        18500         0
## 318      373 Female  47        51000         1
## 319      879   Male  36        73500         0
## 320      806 Female  20        36000         0
## 321      770   Male  40        43500         0
## 322      387 Female  36        98500         1
## 323      394 Female  60        43500         0
## 324      911 Female  45       140500         1
## 325      922   Male  46        59000         0
## 326      419   Male  38        24500         0
## 327      426 Female  41        72000         0
## 328       10   Male  24        64500         0
## 329      547   Male  42        79500         0
## 330      174   Male  19        25000         0
## 331      467   Male  28        91500         0
## 332      502   Male  57       134500         1
## 333      553   Male  33        19500         0
## 334      293   Male  28        89000         0
## 335      719 Female  59        96500         1
## 336      759   Male  43        66500         0
## 337      740   Male  42       136500         1
## 338       87   Male  19        85000         0
## 339      753 Female  61        49500         1
## 340      389   Male  50        75500         0
## 341      792 Female  34        43000         0
## 342      783   Male  44        74500         0
## 343      487 Female  48        51500         1
## 344       84   Male  37        74000         0
## 345       59 Female  37        51500         0
## 346      153   Male  39       106000         1
## 347      920 Female  42        31500         0
## 348      535   Male  63       104500         1
## 349      883 Female  46        41000         1
## 350      750   Male  38        71500         1
## 351      320 Female  52       143500         1
## 352      458 Female  51        89500         1
## 353      396 Female  26        15000         0
## 354      416 Female  32        69500         0
## 355      575   Male  38        63500         0
## 356      659 Female  48       114500         1
## 357      208 Female  41        52500         0
## 358      534   Male  52        88500         1
## 359       97 Female  61        47500         1
## 360      628 Female  62        44500         0
## 361      682 Female  48        30000         1
## 362      970 Female  34        70500         0
## 363      496   Male  29        43000         0
## 364      762   Male  38        80500         1
## 365      495 Female  50        52500         1
## 366      574 Female  58        34500         1
## 367      514 Female  34        72000         0
## 368      873   Male  24        21500         0
## 369      867 Female  42        73500         0
## 370      272 Female  38        79500         0
## 371      383   Male  48        24500         1
## 372      178   Male  46        27500         1
## 373      739 Female  50        44000         0
## 374       47 Female  48        48500         0
## 375      625 Female  29        56500         0
## 376      801   Male  38        73500         0
## 377      615   Male  61        35500         1
## 378      606   Male  43        55500         0
## 379      206 Female  40       139500         1
## 380      997 Female  41       143500         1
## 381      216   Male  36        59500         0
## 382      898 Female  45        45000         1
## 383       77   Male  19        70000         0
## 384      529 Female  43       133000         0
## 385      769   Male  38        61000         0
## 386      845 Female  55        92500         1
## 387      239 Female  54       104000         1
## 388      459   Male  25        87000         0
## 389      949   Male  31        76000         0
## 390      552   Male  60        42000         1
## 391      317 Female  38        81500         0
## 392      692 Female  36        72500         0
## 393      683 Female  45        92500         1
## 394      209 Female  54       148500         1
## 395      187   Male  29        60500         0
## 396      170 Female  23        28000         0
## 397      244   Male  46        88000         1
## 398      512   Male  26        30000         0
## 399      679 Female  39        81500         0
## 400      838   Male  45       151500         1
## 401      224 Female  36        51500         0
## 402      600 Female  40        62500         0
## 403      808   Male  25        85500         0
## 404      171 Female  52       147500         1
## 405      220 Female  30       116000         0
## 406      956   Male  29        43000         0
## 407      936   Male  40        65000         0
## 408      333   Male  20        71500         0
## 409      854   Male  25        80000         0
## 410       58 Female  43        81500         1
## 411      716 Female  53        91500         1
## 412      773 Female  46       135500         0
## 413      835 Female  56        84500         1
## 414      350 Female  26        80000         0
## 415      164   Male  31        18000         0
## 416      379 Female  62        31500         1
## 417      637   Male  42       149000         1
## 418      841   Male  48        34500         1
## 419      205 Female  36        66500         0
## 420      243 Female  31        89000         0
## 421      361 Female  37        55500         0
## 422      269 Female  41        57500         0
## 423       31 Female  28        59500         0
## 424      745   Male  33        43000         0
## 425      880   Male  63        36500         1
## 426      938 Female  50        36000         1
## 427      910 Female  34        25000         0
## 428      284   Male  29        33500         0
## 429       71   Male  42        46500         0
## 430      143   Male  28        90500         0
## 431      452   Male  21        88000         0
## 432      252 Female  53        34000         1
## 433      810 Female  38        65000         0
## 434      541 Female  21        68000         0
## 435      237   Male  41        66500         0
## 436      931 Female  43       112000         1
## 437      664   Male  47        89500         1
## 438      616   Male  38        57500         0
## 439      566 Female  29        45500         0
## 440      853   Male  38        71000         0
## 441      169 Female  47       107000         1
## 442      472 Female  41        72000         0
## 443      865 Female  29        85500         0
## 444      791 Female  37       146000         1
## 445      878   Male  41        61500         0
## 446      271 Female  36       148500         1
## 447      763   Male  26        32000         0
## 448       98   Male  32        75500         0
## 449      621 Female  26        72000         0
## 450      113   Male  42        80500         0
## 451      576   Male  61       103500         1
## 452      862 Female  53       139500         1
## 453      896   Male  43        77500         0
## 454       32 Female  47        50000         1
## 455      871   Male  31        50500         0
## 456      944 Female  49        84500         0
## 457      888 Female  37       127500         1
## 458      761 Female  52        90000         1
## 459      533   Male  46        23000         1
## 460      245   Male  23        19500         0
## 461      102   Male  31        16500         0
## 462      961   Male  35        72000         0
## 463      976 Female  19        26000         0
## 464      685   Male  32       120000         1
## 465        2 Female  38        63500         0
## 466      858 Female  40        82500         0
## 467      434 Female  19        69500         0
## 468      849 Female  22        63000         0
## 469       66 Female  59       106500         1
## 470      795 Female  46        46500         1
## 471      876   Male  48        44500         0
## 472      843   Male  46       131500         1
## 473      537 Female  41        51000         0
## 474      851 Female  58        47000         1
## 475      163   Male  39       146500         1
## 476      141 Female  57       105500         1
## 477      339   Male  35        79000         0
## 478      262 Female  39        59000         0
## 479      210   Male  44       130500         1
## 480      261 Female  34       114500         0
## 481      720 Female  61        25500         1
## 482      725 Female  38        58500         0
## 483      430 Female  42        75000         0
## 484      425 Female  30        29500         0
## 485      506 Female  28        37000         0
## 486      421 Female  53        39500         1
## 487      157   Male  25        24500         0
## 488      214 Female  60        46000         1
## 489      149   Male  52        24500         1
## 490      609   Male  40       123500         1
## 491      754 Female  33        69000         0
## 492      259   Male  27        17500         0
## 493      309 Female  56       126500         1
## 494      246 Female  38        50000         0
## 495        5   Male  41       108500         1
## 496      138 Female  43        74500         0
## 497      140   Male  47        80500         1
## 498      236 Female  38        94500         1
## 499      777 Female  52       138000         1
## 500      708 Female  27        96000         1
## 501      765   Male  40        57000         0
## 502      334 Female  46        22000         0
## 503      198   Male  32        18000         0
## 504      852 Female  32       117000         0
## 505      490   Male  40        59000         0
## 506      548 Female  39        56500         0
## 507       22 Female  56       131500         1
## 508      587 Female  30        79000         0
## 509       96   Male  41        76500         0
## 510      690 Female  61        40500         1
## 511      463   Male  56        74500         1
## 512      799   Male  35        39000         0
## 513      194 Female  48       131000         1
## 514      836 Female  36        63000         0
## 515      314 Female  38       138500         1
## 516       43 Female  51       146000         1
## 517      913 Female  59        29000         1
## 518       72 Female  28        85500         0
## 519      975   Male  40        72500         1
## 520      270   Male  23        63000         0
## 521      315   Male  56        40500         1
## 522      176   Male  41        73500         0
## 523       49   Male  26        91500         0
## 524       36   Male  20        26500         0
## 525      297   Male  24        84000         0
## 526      985 Female  50        40500         1
## 527      158   Male  42        65000         0
## 528      658 Female  27        58000         0
## 529      341 Female  23        66000         0
## 530      718 Female  47        83500         0
## 531      550   Male  49        91500         1
## 532      901 Female  35        73500         0
## 533      151 Female  57        26000         1
## 534      285   Male  39        96000         1
## 535      570 Female  31        80500         0
## 536      833 Female  59        24500         1
## 537      127   Male  40        81500         1
## 538       76 Female  43       109500         1
## 539      915 Female  42        70000         0
## 540      549 Female  35        50000         0
## 541      217 Female  39        51500         0
## 542      582   Male  50        29500         1
## 543      694 Female  32        86000         0
## 544      186   Male  35        27000         0
## 545      518 Female  40       135500         1
## 546      513 Female  20        22500         0
## 547      147   Male  21        75500         0
## 548      491 Female  49       135500         1
## 549      889   Male  36        51500         0
## 550      907   Male  36        21500         0
## 551      601 Female  58       132500         1
## 552      875 Female  44        74500         0
## 553      455   Male  40       135500         1
## 554      526   Male  20        86000         0
## 555      568   Male  28        21500         0
## 556       75   Male  23        82500         0
## 557      930   Male  48        41000         1
## 558       38 Female  43        76500         0
## 559      713 Female  19        21000         0
## 560      515 Female  30        62000         0
## 561      146   Male  32        18000         0
## 562      991 Female  53        38500         1
## 563      377   Male  44        54500         0
## 564      227   Male  37        55000         0
## 565      406   Male  46        28000         1
## 566       21   Male  36        62500         0
## 567      809   Male  54        70000         1
## 568      630 Female  44        39000         0
## 569      507   Male  51        92500         1
## 570      107 Female  47        97500         0
## 571      966 Female  32       150000         1
## 572      481 Female  62        78500         1
## 573      816 Female  25        90500         0
## 574      893 Female  42        61500         0
## 575      986   Male  35        58000         0
## 576      967   Male  25        22000         0
## 577      982   Male  41        48500         0
## 578      803 Female  58        23000         1
## 579        9   Male  46        33500         1
## 580      822   Male  44        89500         1
## 581      360   Male  35       108000         0
## 582      200 Female  44        74500         0
## 583      610 Female  48        96000         1
## 584      593 Female  45        55500         0
## 585      584   Male  41        60500         0
## 586      925 Female  54       105500         1
## 587      410   Male  22        18000         0
## 588      717   Male  18        82000         0
## 589       33   Male  30        62500         0
## 590      115   Male  40        61000         0
## 591      556   Male  41        59000         0
## 592      636   Male  52        76500         0
## 593      947 Female  21        16000         0
## 594      929 Female  27        81500         0
## 595      442   Male  29        75000         0
## 596      159   Male  29        61000         0
## 597      959   Male  27        90000         0
## 598      152   Male  24        23000         0
## 599      256 Female  37        80000         0
## 600      296 Female  26        35000         0
## 601       37 Female  22        27000         0
## 602      144   Male  53        72000         1
## 603      764 Female  27        53500         0
## 604       51   Male  28        79000         0
## 605      523 Female  44        74500         0
## 606      399 Female  43        71500         0
## 607       61   Male  59       145500         1
## 608      329   Male  24        55000         0
## 609      165   Male  31        81500         0
## 610      362 Female  37        71000         0
## 611      195 Female  52       135500         0
## 612      950   Male  60        83000         1
## 613      644   Male  36       125000         1
## 614      188 Female  25        28500         0
## 615      787 Female  29        86500         0
## 616      493 Female  37        57000         0
## 617       53   Male  60        42000         1
## 618      409 Female  60       124500         1
## 619      260 Female  54       108000         1
## 620        3   Male  20        74000         0
## 621      223 Female  45        81500         0
## 622      344 Female  49        24500         0
## 623       18   Male  63        44500         1
## 624      109   Male  47       118500         1
## 625      864 Female  27        44500         0
## 626      474 Female  38        52500         0
## 627      793   Male  55        39000         1
## 628      632 Female  18        86000         0
## 629      300 Female  18        68000         0
## 630      323   Male  61        84500         1
## 631      847 Female  28        85000         0
## 632      326 Female  26        52000         0
## 633      204 Female  36        73500         0
## 634      619   Male  32        19500         0
## 635      727 Female  35        25000         0
## 636      136 Female  54        35500         1
## 637      468 Female  40        82500         0
## 638      706   Male  43        55500         0
## 639      483 Female  46       114500         1
## 640      298 Female  24        89000         0
## 641      897   Male  37       144000         1
## 642      386 Female  46        74000         0
## 643      447   Male  43       105500         1
## 644      677 Female  48        52500         1
## 645      304   Male  42       108500         1
## 646      380   Male  50        89500         1
## 647      914   Male  30        49000         0
## 648      538   Male  33        31000         0
## 649      650   Male  39        71000         0
## 650      702   Male  40        54500         0
## 651        7 Female  51       134000         0
## 652      305 Female  53        22500         1
## 653      393   Male  37       100500         1
## 654      780 Female  40        80500         0
## 655      654   Male  22        73500         0
## 656      870 Female  41       115500         1
## 657      866 Female  45        82500         1
## 658       12   Male  29        90500         0
## 659      137 Female  60        89500         1
## 660      101   Male  38        93500         1
## 661      623 Female  47        42500         1
## 662      290   Male  34       112000         1
## 663       45   Male  30        17000         0
## 664      234 Female  44        82500         0
## 665      307   Male  29        80500         0
## 666      562   Male  29       124500         1
## 667      376 Female  37        76500         0
## 668      711   Male  50        25500         1
## 669        6 Female  36        45500         0
## 670      788 Female  59        42000         0
## 671      643 Female  57        33000         1
## 672      310   Male  49        34500         1
## 673      530   Male  27        31500         0
## 674      318   Male  49        42500         1
## 675      859 Female  30        84500         0
## 676      336   Male  38        53500         0
## 677      709 Female  41        60000         0
## 678      278 Female  43        91500         1
## 679      413 Female  53        90500         1
## 680      749   Male  35        55000         0
## 681      594   Male  50        45500         0
## 682      366   Male  45        56500         0
## 683      201 Female  34        61500         0
## 684      525 Female  42        72500         0
## 685      666 Female  19        45500         0
## 686       29   Male  36        40500         0
## 687      665   Male  48        81500         0
## 688      444 Female  26        84000         0
## 689       57   Male  26        88500         0
## 690      746 Female  36        54000         0
## 691      834 Female  42        80000         1
## 692      995   Male  41        58500         0
## 693       11   Male  40        57000         0
## 694      653 Female  35        60000         0
## 695      358   Male  39       101500         1
## 696       79   Male  35        91000         1
## 697      671 Female  42        75500         0
## 698      872 Female  42        73500         0
## 699      672   Male  38        61000         0
## 700        8 Female  54        26000         1
## 701      129   Male  47       105000         1
## 702      328   Male  37        77000         0
## 703      161   Male  42        53500         0
## 704      368   Male  36        89500         0
## 705      499 Female  35        23000         0
## 706      308   Male  35        50000         0
## 707      771   Male  28        89500         0
## 708      180   Male  27        33500         0
## 709       44   Male  49       142500         0
## 710      265 Female  42        61500         0
## 711      855   Male  41        73500         0
## 712      363 Female  51        32500         1
## 713      840   Male  36        52000         0
## 714      999   Male  60       131500         1
## 715       17   Male  40        57000         0
## 716      133 Female  39        65500         0
## 717      955   Male  33       101500         1
## 718      894   Male  46       117000         1
## 719      503   Male  35        73000         0
## 720      869   Male  60       102000         1
## 721       48 Female  32       135000         1
## 722      617   Male  19        76000         0
## 723      874 Female  39       128500         1
## 724      578 Female  38        62500         0
## 725      312 Female  55        23500         1
## 726      110   Male  35       113500         1
## 727       13   Male  29        80000         0
## 728      734 Female  47       113000         1
## 729      670   Male  41        72000         0
## 730      603 Female  43        62500         0
## 731      189   Male  44        54500         0
## 732      559   Male  30        76500         0
## 733      226 Female  50       146500         1
## 734      837 Female  52       114000         0
## 735      346 Female  28        58500         0
## 736      733 Female  36       126000         1
## 737      306   Male  37        52000         0
## 738      448   Male  59       135500         1
## 739      585   Male  41        58500         0
## 740      726   Male  23        20000         0
## 741      391   Male  25        59500         0
## 742       56 Female  37        80000         0
## 743      286   Male  39       122000         1
## 744      482 Female  60        30500         1
## 745      595 Female  40       142000         1
## 746      100   Male  47        24500         1
## 747      807   Male  36        74500         0
## 748      111   Male  38        41500         0
## 749      939 Female  55        27500         1
## 750      215 Female  60        28500         1
## 751      449   Male  34       115000         0
## 752       55   Male  27        87500         0
## 753      288 Female  33       118500         0
## 754      817 Female  28        97500         1
## 755      543 Female  29        83000         0
## 756      752 Female  33        51000         0
## 757      516 Female  53       115500         0
## 758      856   Male  38        76500         0
## 759       50   Male  37        53000         0
## 760      627   Male  30        81500         0
## 761      657   Male  38       145500         1
## 762        4 Female  48       119000         1
## 763      433 Female  33       113000         0
## 764      238 Female  50       142500         1
## 765      940   Male  45        79000         0
## 766      193   Male  51        35500         1
## 767       80 Female  35        72000         0
## 768      988   Male  41        79000         0
## 769      784 Female  53       143000         1
## 770      475 Female  55       109500         1
## 771      240 Female  46        82000         0
## 772      969 Female  25        56500         0
## 773      580   Male  18        52000         0
## 774      945   Male  58       144000         1
## 775       74 Female  29        60500         0
## 776      486 Female  50        20000         1
## 777      937 Female  36        58500         0
## 778      422   Male  59        62500         1
## 779      359 Female  48        47500         1
## 780      446 Female  50        37500         1
## 781      231 Female  27        85500         0
## 782      688   Male  27        16500         0
## 783      370 Female  24        49500         0
## 784      699 Female  42        77500         1
## 785      122   Male  35        44500         0
## 786      183 Female  44        62500         0
## 787      484   Male  37       126500         1
## 788      431   Male  63        44500         1
## 789      697   Male  41        72500         1
## 790      678   Male  47        20000         1
## 791      899   Male  27        88000         0
## 792       28 Female  56       104000         1
## 793      313   Male  38        77500         0
## 794      251 Female  31        68000         0
## 795       93   Male  50        87500         1
## 796      125   Male  52        30500         1
## 797      811 Female  49        97500         1
## 798      923 Female  38        74500         0
## 799      352 Female  24        55000         0
## 800      635   Male  42       124500         1
## 801      519 Female  46        32000         1
## 802      551 Female  41        52500         0
## 803      374   Male  60        62500         1
## 804      461 Female  40        59500         0
## 805      714 Female  22        17500         0
## 806      776 Female  40        95500         1
## 807      295   Male  40        55500         0
## 808      276 Female  55       116500         0
## 809      602 Female  28        18500         0
## 810      825 Female  27       119500         0
## 811      705   Male  43        65500         0
## 812      364 Female  41        76500         0
## 813      528 Female  37        64500         0
## 814      103   Male  39       134000         1
## 815       60   Male  48       141000         0
## 816      480   Male  37       119500         1
## 817       73   Male  60        76500         1
## 818      301   Male  37        53500         0
## 819      839 Female  38        79500         1
## 820      400   Male  30        15000         0
## 821      340 Female  53        22500         1
## 822      691 Female  58        27500         1
## 823      247 Female  43       144500         1
## 824       64 Female  40        80500         1
## 825       24 Female  37        93000         1
## 826      842   Male  30        80000         0
## 827      497 Female  45        72500         0
## 828      831 Female  45        22000         1
## 829      436 Female  23        48000         0
## 830       67 Female  38        50000         0
## 831      737   Male  26        23500         0
## 832      983 Female  33        87500         0
## 833      218 Female  27        54000         0
## 834      166   Male  43        67500         0
## 835      454   Male  59       130000         1
## 836      437   Male  42        53500         0
## 837      732 Female  31       118000         1
## 838      281   Male  39        72500         0
## 839      827   Male  31        66000         0
## 840      589   Male  35        38000         0
## 841      723 Female  60        84500         0
## 842      909   Male  38        56500         0
## 843      544   Male  61       146500         1
## 844      992   Male  26        34500         0
## 845      638   Male  39       127500         1
## 846      812   Male  36        28500         0
## 847      403   Male  45        67500         0
## 848      656 Female  48        35000         1
## 849      250   Male  41        63500         0
## 850      564   Male  21        87500         0
## 851      598 Female  42        90000         1
## 852       62   Male  25        20500         0
## 853       40   Male  26        86000         0
## 854      767   Male  37        72000         0
## 855      202 Female  40        72500         0
## 856      591 Female  60       108000         1
## 857      207   Male  19        53500         0
## 858      696 Female  54        83500         1
## 859      984   Male  36        60500         0
## 860      586 Female  41        67500         0
## 861      626 Female  37        33000         0
## 862      724   Male  54        73500         1
## 863      354   Male  41        53500         0
## 864      884   Male  56        60000         1
## 865      758   Male  43       150500         1
## 866      545   Male  49       119500         1
## 867      908 Female  39        61000         0
## 868      248 Female  23        82000         0
## 869      355 Female  31        63500         0
## 870      289   Male  35        75000         0
## 871      804 Female  48       145500         1
## 872      618   Male  39        77000         0
## 873      916 Female  42        73500         0
## 874      292 Female  33       118500         1
## 875      488 Female  47        49000         1
## 876       52   Male  40        71000         1
## 877      815 Female  23        28500         0
## 878      473   Male  41        58500         0
## 879      337 Female  35        47000         0
## 880      639 Female  40       148500         1
## 881      932   Male  36        76500         0
## 882      274   Male  46        23500         1
## 883      451   Male  31       108500         1
## 884       63 Female  55       140500         1
## 885      291 Female  52        41500         1
## 886      155   Male  40        47000         0
## 887      751   Male  45       106500         1
## 888      860   Male  36        60000         0
## 889      789   Male  42        73500         0
## 890      303   Male  52        91500         1
## 891      232   Male  36       118000         1
## 892      181 Female  47        30000         1
## 893      560   Male  43        66500         0
## 894      343 Female  58       123500         1
## 895      501 Female  57       122000         1
## 896      464 Female  49        31500         1
## 897      319   Male  49        75500         1
## 898      952   Male  37       145500         1
## 899      356 Female  42        64500         0
## 900      779   Male  45        22000         1
## 901       14   Male  47        60500         0
## 902      192   Male  39        35500         0
## 903      581 Female  35        26500         0
## 904      494 Female  22        69500         0
## 905      981   Male  37        34500         0
## 906      412 Female  61        97500         1
## 907      505 Female  33        60000         0
## 908       16   Male  55       152500         1
## 909      508   Male  36        54500         0
## 910      211   Male  39        62500         0
## 911      818   Male  48        35500         1
## 912      823 Female  32       117000         1
## 913      106   Male  33        19500         0
## 914      900 Female  21        83500         0
## 915      561   Male  39        72500         0
## 916      797 Female  41        72000         0
## 917      989 Female  24        27000         0
## 918      558 Female  51       140500         1
## 919      441   Male  38        71000         0
## 920      963 Female  52        21000         1
## 921      457   Male  34        43000         0
## 922      802   Male  48       106500         1
## 923      470 Female  29        28000         0
## 924      707 Female  59       102500         1
## 925      712 Female  29        47000         0
## 926      283 Female  50        49500         0
## 927      280   Male  38        75500         0
## 928      388 Female  36        61500         0
## 929      781 Female  28        59000         0
## 930      608   Male  43        59500         0
## 931      766   Male  57        74000         1
## 932      177   Male  36       109500         0
## 933      404 Female  49       139500         1
## 934      968 Female  34        52500         0
## 935      212 Female  47       141500         1
## 936      539 Female  35        65000         0
## 937      886   Male  50       107500         1
## 938      748 Female  41        73500         0
## 939      735   Male  50        90500         1
## 940      173 Female  56       145500         1
## 941      592 Female  35        57000         0
## 942      255   Male  40        78000         0
## 943      824 Female  49        34500         1
## 944      631   Male  25        90000         0
## 945      197   Male  36        33000         0
## 946      405 Female  48       138000         1
## 947      798   Male  42        54000         0
## 948      185   Male  51        76500         1
## 949      492   Male  49        65000         0
## 950      731   Male  46        80500         0
## 951      980   Male  27        20000         0
## 952      660   Male  39        52500         0
## 953      479 Female  35        97000         1
## 954      680   Male  47        34000         1
## 955      830 Female  52        38000         1
## 956      150 Female  49        30500         1
## 957      760 Female  36        26500         0
## 958      857   Male  26        80500         0
## 959      927 Female  38        34500         0
## 960      154 Female  48        50500         1
## 961        1   Male  32       100000         1
## 962      933 Female  40        75000         0
## 963      424 Female  42        81500         0
## 964      814   Male  41        62500         0
## 965      184 Female  24        83500         0
## 966      962   Male  52       150000         1
## 967      611 Female  48       133500         1
## 968      114   Male  41        52000         0
## 969       39   Male  19        83500         0
## 970      721   Male  29       148000         1
## 971      642 Female  58       101000         1
## 972      844 Female  36        48500         0
## 973      440   Male  63        85500         1
## 974      577 Female  35        44000         0
## 975      757   Male  22        81000         0
## 976       65 Female  24        32000         0
## 977      124 Female  39        71000         0
## 978      794 Female  31        15000         0
## 979      998 Female  60        77500         1
## 980      287   Male  32        59500         0
## 981       82 Female  46        23500         1
## 982      695 Female  39        79000         0
## 983      946   Male  39        60500         0
## 984      676   Male  42        54000         0
## 985      649 Female  28        87000         0
## 986      477 Female  45        77500         0
## 987      572   Male  48        33000         1
## 988      832 Female  31        71000         0
## 989      302   Male  33       121500         1
## 990      420 Female  27        84000         0
## 991      971   Male  25        56500         0
## 992      382 Female  30        48500         0
## 993      168 Female  24        67500         0
## 994      476 Female  54       136500         0
## 995      951 Female  53       104000         1
## 996      863   Male  38        59000         0
## 997      800 Female  47        23500         0
## 998      407 Female  28       138500         1
## 999      299 Female  48       134000         1
## 1000     687 Female  44        73500         0

2. Exploratory Data Analysis

# manage data type
car_df$Gender<-factor(car_df$Gender)
car_df$Purchased<-factor(car_df$Purchased)
2.1 Univariate Analysis
#### ggplot with geom_boxplot
up1<-ggplot(data = car_df, aes(y=AnnualSalary))+
  geom_boxplot()+scale_y_continuous(breaks = c(40000, 60000, 70000, 80000, 120000))
up2<-ggplot(data = car_df, aes(y=Age))+geom_boxplot()
uni_analysis<-grid.arrange(up1, up2, nrow=1)

2.2 Bivariate Analysis
#### ggplot with geom_boxplot
bp1<-ggplot(data = car_df, aes(x= Gender, y=AnnualSalary))+geom_boxplot()+
  labs(title = 'Decision factor by Gender and Annual Salary')+
  theme(plot.title = element_text(hjust = 0.5))+scale_y_continuous(breaks = c(40000, 60000, 70000, 80000, 120000))
bp2<-ggplot(data = car_df, aes(x= Purchased, y=AnnualSalary))+geom_boxplot()+
  labs(title = 'Decision factor by Purchased and Annual Salary ')+
  theme(plot.title = element_text(hjust = 0.5))+
  scale_x_discrete(breaks = c(0, 1), labels =c('Tidak membeli', 'Membeli'))+
  scale_y_continuous(breaks = c(40000, 60000, 70000, 80000, 120000))
biva_analysis<-grid.arrange(bp1, bp2, nrow=1)

2.3 Multivariate Analysis
#### ggplot with geom_point
multi_analysis<-ggplot(data = car_df, aes(x=AnnualSalary, y=Age, color=Gender, shape=Purchased))+
  geom_point()+scale_x_continuous(breaks= c(40000, 80000, 120000),labels = c('40000$', '80000$', '120000$'))+
  labs(title = 'Client Distribution by Age and Annual Salary')+
  theme(plot.title = element_text(hjust = 0.5))+
  scale_shape_discrete(name = 'Status', labels=c('Tdk membeli', 'Membeli'))
multi_analysis

2.4 Insight EDA
# 1. The data shows that, the average annual salary of people is around 72K.
# 2. The average who comes to buy a car is 40 years old.
# 3. The average annual salary of female is higher than male.
# 4. People who have an annual salary above 80K will tend to buy and can pay off
#    whereas people who have an annual salary below 70k will tend not to buy a car.
# 5. Most people looking for a car are female.
# 6. Based on Pearson's CC, only with age we can predict 62% the person will buy the car.

3. Data Preparation

3.1 Data Cleansing
#remove User.ID
car_df$User.ID<-NULL
3.2 Data Split
m<-nrow(car_df)
n<-ncol(car_df)

mtrain<-floor(m*0.7)
mtest<-m-mtrain

set.seed(2022)
train_idx<-sample(m, mtrain)

train_df<-car_df[train_idx, ]
test_df<-car_df[-train_idx, ]

4. Modeling

4.1 Logistic Regression
model_lr<-glm(formula = Purchased~. , data = train_df, family = binomial)
model_lr
## 
## Call:  glm(formula = Purchased ~ ., family = binomial, data = train_df)
## 
## Coefficients:
##  (Intercept)    GenderMale           Age  AnnualSalary  
##   -1.268e+01     1.035e-01     2.284e-01     3.612e-05  
## 
## Degrees of Freedom: 699 Total (i.e. Null);  696 Residual
## Null Deviance:       940.6 
## Residual Deviance: 496.8     AIC: 504.8
4.2 Decision Tree
model_ctree<-ctree(formula = Purchased~. , data = train_df)
plot(model_ctree)

4.3 Random Forest
set.seed(2022)
model_forest<-randomForest(formula = Purchased~. , data = train_df)
model_forest
## 
## Call:
##  randomForest(formula = Purchased ~ ., data = train_df) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 1
## 
##         OOB estimate of  error rate: 9.29%
## Confusion matrix:
##     0   1 class.error
## 0 385  37  0.08767773
## 1  28 250  0.10071942
4.4 Support Vector Machine
model_svm<- svm(formula = Purchased~. , data = train_df)
model_svm
## 
## Call:
## svm(formula = Purchased ~ ., data = train_df)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  192

5. Evaluation

5.1 Grab actual data
##actual data to compare with prediction
actual_purchase<-test_df$Purchased
5.2 Make a compute function

Info :

Not Purchased(0) is negative and Purchased(1) is positive.

##a function for compute accuracy, precision, recall, f1-score
computeClass<- function(actual, prediction, model_name){
  i <- table(prediction, actual)
  tp <- i[2,2] 
  tn <- i[1,1] 
  fp <- i[2,1]  
  fn <- i[1,2] 
  i
  
  accuracy <- (tp+tn) / (tp+tn+fp+fn)
  precision <- tp / (tp+fp) 
  recall <- tp / (tp+fn)
  f1score <- (2*precision*recall) / (precision+recall)
  
  result <- paste('** Method Name: ', model_name, '\n', 
                  'Accuracy : ', round(accuracy, 4) * 100, '%', '\n', 
                  'Precision: ', round(precision, 4) * 100, '%', '\n', 
                  'Recall   : ', round(recall, 4) * 100, '%', '\n',
                  'F1 Score : ', round(f1score, 4) * 100, '%', '\n', 
                  sep = '')
  cat(result)
}
5.3 Calculation results with each prediction method
#5.3.1 Logistic Regression
prob<-predict(model_lr, test_df, type = 'response')
predict_lr<-factor(prob>0.99,
                   levels = c(FALSE, TRUE),
                   labels = c('Not Purchased', 'Purchased'))
computeClass(actual_purchase, predict_lr, 'Logistic Regression')
## ** Method Name: Logistic Regression
## Accuracy : 63.67%
## Precision: 100%
## Recall   : 12.1%
## F1 Score : 21.58%
#5.3.2 Decision Tree
predict_tree<- predict(model_ctree, test_df)
computeClass(actual_purchase, predict_tree, 'Decision Tree')
## ** Method Name: Decision Tree
## Accuracy : 88.33%
## Precision: 82.48%
## Recall   : 91.13%
## F1 Score : 86.59%
#5.3.3 Random Forest
predict_forest<- predict(model_forest, test_df)
computeClass(actual_purchase, predict_forest, 'Random Forest')
## ** Method Name: Random Forest
## Accuracy : 89%
## Precision: 85.27%
## Recall   : 88.71%
## F1 Score : 86.96%
#5.3.4 SVM
predict_svm<- predict(model_svm, test_df)
computeClass(actual_purchase, predict_svm, 'SVM')
## ** Method Name: SVM
## Accuracy : 88.67%
## Precision: 84.62%
## Recall   : 88.71%
## F1 Score : 86.61%

6. Recommendation

### 1. Prioritizing car deals to clients over the age of 52.
### 2. Targeting car sales to clients who earn $91500 or more.
### 3. Offering cars to clients between the ages of 44 to 52 with an average salary of over $91500.