This report provides car sales prediction using classification algorithms.
This dataset contains details of 1000 customers who intend to buy a car, considering their annual salaries.
Columns: User ID Gender Age Annual Salary Purchase Decision (No = 0; Yes = 1)
The dataset is hosted in Kaggle. It can be download here : https://www.kaggle.com/datasets/gabrielsantello/cars-purchase-decision-dataset
library(ggplot2)
library(gridExtra)
library(dplyr)
library(party)
library(randomForest)
library(e1071)
car_df <- read.csv('D:/DATA SCIENCE/Car Sales Prediction/data/car_data.csv')
car_df
## User.ID Gender Age AnnualSalary Purchased
## 1 385 Male 35 20000 0
## 2 681 Male 40 43500 0
## 3 353 Male 49 74000 0
## 4 895 Male 40 107500 1
## 5 661 Male 25 79000 0
## 6 846 Female 47 33500 1
## 7 219 Female 46 132500 1
## 8 588 Male 42 64000 0
## 9 85 Female 30 84500 0
## 10 465 Male 41 52000 0
## 11 686 Male 42 80000 0
## 12 408 Male 47 23000 1
## 13 790 Female 32 72500 0
## 14 116 Female 27 57000 0
## 15 118 Female 42 108000 1
## 16 54 Female 33 149000 1
## 17 90 Male 35 75000 0
## 18 372 Male 35 53000 0
## 19 926 Male 46 79000 1
## 20 94 Female 39 134000 1
## 21 338 Female 39 51500 0
## 22 134 Female 49 39000 1
## 23 821 Male 54 25500 1
## 24 294 Female 41 61500 0
## 25 597 Female 31 117500 0
## 26 567 Male 24 58000 0
## 27 20 Male 40 107000 1
## 28 108 Male 40 97500 1
## 29 105 Female 48 29000 1
## 30 994 Female 38 147500 1
## 31 273 Male 45 26000 1
## 32 263 Male 32 67500 0
## 33 423 Female 37 62000 0
## 34 88 Male 41 79500 0
## 35 81 Female 44 113500 1
## 36 257 Female 47 41500 0
## 37 755 Female 38 55000 0
## 38 249 Female 39 114500 1
## 39 121 Male 42 73000 1
## 40 489 Male 26 15000 0
## 41 555 Female 21 37500 0
## 42 222 Female 59 39500 1
## 43 590 Female 39 66500 0
## 44 645 Female 43 80500 0
## 45 689 Male 49 86000 1
## 46 646 Male 37 75000 0
## 47 964 Female 49 76500 0
## 48 384 Male 28 123000 1
## 49 826 Female 59 48500 1
## 50 27 Female 40 60500 0
## 51 268 Female 38 99500 1
## 52 466 Male 51 35500 1
## 53 411 Female 55 130000 1
## 54 445 Female 23 56500 0
## 55 428 Female 49 43500 1
## 56 974 Female 49 36000 1
## 57 828 Male 48 21500 1
## 58 348 Female 49 98500 0
## 59 730 Male 43 59500 0
## 60 820 Female 35 147000 1
## 61 917 Female 50 109500 1
## 62 15 Male 48 26500 1
## 63 365 Male 49 88000 1
## 64 674 Female 38 72500 0
## 65 30 Male 27 58000 0
## 66 531 Male 35 22000 0
## 67 190 Female 35 71000 0
## 68 160 Male 43 129000 1
## 69 371 Male 41 45000 0
## 70 321 Male 39 42000 0
## 71 230 Female 45 40500 0
## 72 819 Female 51 37500 1
## 73 965 Male 20 86500 0
## 74 351 Female 41 63000 0
## 75 266 Male 58 41500 1
## 76 629 Female 50 115500 1
## 77 891 Female 28 55500 0
## 78 311 Male 34 32500 0
## 79 439 Female 57 106500 1
## 80 861 Female 50 53500 1
## 81 510 Male 49 28000 1
## 82 277 Male 53 151500 1
## 83 417 Male 42 88500 1
## 84 123 Female 27 17000 0
## 85 912 Female 59 76000 1
## 86 667 Female 51 136500 1
## 87 435 Female 49 120500 1
## 88 722 Female 32 35500 0
## 89 892 Male 30 44500 0
## 90 279 Female 27 18500 0
## 91 500 Female 36 50000 0
## 92 684 Male 60 144500 1
## 93 414 Male 38 51000 0
## 94 349 Female 29 38500 0
## 95 456 Female 61 103500 1
## 96 485 Female 33 151500 1
## 97 960 Male 45 75500 1
## 98 233 Female 26 17000 0
## 99 191 Male 30 87000 0
## 100 471 Male 38 60500 0
## 101 977 Female 28 84000 0
## 102 120 Female 50 88000 1
## 103 126 Male 26 80000 0
## 104 23 Male 39 42000 0
## 105 225 Male 26 16000 0
## 106 772 Female 51 21500 1
## 107 398 Male 36 56500 0
## 108 367 Male 47 29500 1
## 109 511 Female 18 44000 0
## 110 775 Male 43 109500 1
## 111 324 Male 61 43500 1
## 112 829 Female 41 30000 0
## 113 478 Male 31 58000 0
## 114 546 Female 29 83000 0
## 115 942 Female 48 108500 1
## 116 571 Female 38 112000 0
## 117 375 Male 59 143000 1
## 118 905 Female 33 136500 1
## 119 132 Male 35 61000 0
## 120 438 Male 28 59000 0
## 121 972 Male 30 107000 1
## 122 596 Male 36 99000 1
## 123 395 Male 40 72500 0
## 124 663 Female 33 41000 0
## 125 903 Male 36 23500 0
## 126 25 Male 49 34500 1
## 127 599 Female 40 72000 0
## 128 563 Female 20 23000 0
## 129 607 Female 37 137000 1
## 130 928 Female 47 144000 1
## 131 453 Female 36 24500 0
## 132 509 Male 40 76500 0
## 133 203 Female 59 88000 1
## 134 450 Female 36 75000 0
## 135 573 Male 42 44500 0
## 136 662 Male 27 89000 0
## 137 651 Male 32 77500 0
## 138 19 Female 48 31500 1
## 139 634 Female 38 81500 0
## 140 647 Male 62 145500 1
## 141 693 Male 38 78500 0
## 142 728 Male 40 75000 0
## 143 520 Male 57 72500 1
## 144 213 Female 41 114500 0
## 145 142 Male 22 89500 0
## 146 322 Female 40 74500 0
## 147 540 Female 39 56500 0
## 148 743 Female 39 73000 0
## 149 887 Female 41 71000 0
## 150 241 Male 28 32000 0
## 151 701 Male 39 62500 0
## 152 848 Male 25 33000 0
## 153 782 Male 43 81500 0
## 154 877 Male 48 74000 1
## 155 924 Male 37 79000 1
## 156 83 Male 36 144000 1
## 157 369 Female 32 16500 0
## 158 579 Female 51 121500 1
## 159 583 Male 30 89000 0
## 160 919 Male 36 39500 0
## 161 957 Male 30 135000 1
## 162 973 Female 63 110500 1
## 163 521 Male 48 90000 1
## 164 41 Female 36 78500 0
## 165 42 Male 31 136500 1
## 166 327 Male 58 61500 1
## 167 347 Female 22 55000 0
## 168 148 Female 46 96000 0
## 169 443 Female 63 48500 1
## 170 700 Female 33 28000 0
## 171 738 Male 35 59000 0
## 172 527 Male 41 87000 1
## 173 742 Male 26 81000 0
## 174 175 Female 44 134500 0
## 175 335 Male 39 120500 1
## 176 996 Male 52 67500 0
## 177 378 Female 53 82000 1
## 178 655 Male 43 74500 1
## 179 104 Male 51 23000 1
## 180 221 Female 55 125000 1
## 181 786 Female 60 35500 1
## 182 536 Female 38 59500 0
## 183 460 Male 24 19000 0
## 184 710 Female 56 106500 1
## 185 524 Female 56 36500 1
## 186 890 Male 37 70000 1
## 187 46 Female 61 109500 1
## 188 95 Male 44 81500 0
## 189 78 Female 45 110500 1
## 190 402 Female 55 40500 1
## 191 264 Male 51 143500 0
## 192 34 Male 50 66500 0
## 193 522 Female 26 118000 0
## 194 156 Male 19 19000 0
## 195 850 Male 43 80500 0
## 196 99 Male 31 90500 0
## 197 462 Male 39 62500 0
## 198 979 Female 53 46500 0
## 199 432 Female 44 65500 0
## 200 69 Male 47 25000 1
## 201 943 Female 59 83000 0
## 202 381 Female 42 52500 0
## 203 958 Male 38 54500 0
## 204 954 Female 41 80000 0
## 205 612 Male 35 88000 0
## 206 267 Male 52 90500 1
## 207 785 Female 51 133500 1
## 208 557 Male 20 49000 0
## 209 935 Male 45 32000 1
## 210 805 Male 62 132500 1
## 211 554 Female 54 144500 1
## 212 990 Female 49 132500 1
## 213 345 Female 20 82000 0
## 214 172 Male 43 63500 0
## 215 429 Female 27 16500 0
## 216 948 Female 23 64500 0
## 217 89 Female 58 95000 1
## 218 139 Male 61 43500 1
## 219 332 Male 21 50500 0
## 220 70 Female 34 42500 0
## 221 868 Female 19 87500 0
## 222 747 Male 42 60500 0
## 223 542 Female 47 75500 0
## 224 882 Male 20 77500 0
## 225 744 Male 36 80500 0
## 226 498 Male 40 78500 0
## 227 934 Male 40 146500 1
## 228 253 Female 55 105500 1
## 229 620 Female 40 64500 0
## 230 778 Female 28 55000 0
## 231 796 Male 36 76500 0
## 232 532 Female 51 45500 0
## 233 242 Male 48 33000 1
## 234 504 Female 24 29500 0
## 235 569 Male 30 149500 1
## 236 1000 Female 40 76500 1
## 237 401 Female 62 90500 1
## 238 229 Male 57 60000 1
## 239 91 Male 31 74000 0
## 240 135 Female 27 31000 0
## 241 906 Male 42 104000 1
## 242 605 Male 49 90500 1
## 243 613 Male 36 92500 1
## 244 196 Male 45 66500 0
## 245 167 Female 31 34000 0
## 246 698 Male 42 98500 1
## 247 565 Female 32 90500 0
## 248 669 Male 28 59500 0
## 249 715 Male 37 61500 0
## 250 258 Female 58 38000 1
## 251 604 Female 57 28500 1
## 252 517 Female 51 37500 1
## 253 228 Female 27 36500 0
## 254 987 Male 20 20500 0
## 255 885 Male 50 36500 1
## 256 415 Female 21 83500 0
## 257 357 Female 57 110500 1
## 258 254 Male 40 77500 0
## 259 235 Female 21 24500 0
## 260 282 Male 30 44500 0
## 261 128 Male 42 73500 0
## 262 703 Female 40 60000 0
## 263 131 Male 42 65000 0
## 264 119 Male 43 73500 1
## 265 921 Female 42 136500 1
## 266 881 Female 49 36500 1
## 267 182 Male 21 72000 0
## 268 112 Female 58 127500 1
## 269 342 Female 29 88500 0
## 270 397 Male 27 81500 0
## 271 35 Female 42 53000 0
## 272 145 Female 38 80000 0
## 273 941 Female 34 150500 1
## 274 736 Female 42 79000 0
## 275 68 Female 35 44500 0
## 276 641 Male 35 116500 0
## 277 668 Female 26 43000 0
## 278 673 Male 38 81500 0
## 279 768 Female 42 81500 0
## 280 117 Female 39 75000 1
## 281 633 Male 47 43000 0
## 282 418 Female 28 44000 0
## 283 993 Female 44 139000 1
## 284 902 Male 58 75500 1
## 285 331 Female 32 119500 1
## 286 918 Female 34 29500 0
## 287 622 Female 45 131000 1
## 288 86 Male 31 88500 0
## 289 392 Male 49 89000 1
## 290 162 Male 45 56500 0
## 291 978 Male 26 81500 0
## 292 624 Male 57 61500 1
## 293 640 Male 56 133000 1
## 294 26 Female 47 47000 0
## 295 741 Female 43 54500 0
## 296 704 Male 34 44500 0
## 297 199 Female 20 27500 0
## 298 130 Female 38 149500 1
## 299 179 Male 50 29500 1
## 300 427 Female 49 141000 1
## 301 275 Male 49 81500 1
## 302 330 Female 35 77000 0
## 303 813 Female 28 32500 0
## 304 390 Female 27 73500 0
## 305 774 Female 38 113000 1
## 306 469 Male 49 28000 1
## 307 652 Male 55 71500 1
## 308 729 Male 40 79500 0
## 309 648 Female 20 82000 0
## 310 92 Female 25 33500 0
## 311 904 Male 27 82500 0
## 312 614 Female 51 98500 1
## 313 756 Female 27 137000 1
## 314 316 Female 39 113500 0
## 315 325 Female 37 78000 1
## 316 953 Male 60 34000 1
## 317 675 Male 31 18500 0
## 318 373 Female 47 51000 1
## 319 879 Male 36 73500 0
## 320 806 Female 20 36000 0
## 321 770 Male 40 43500 0
## 322 387 Female 36 98500 1
## 323 394 Female 60 43500 0
## 324 911 Female 45 140500 1
## 325 922 Male 46 59000 0
## 326 419 Male 38 24500 0
## 327 426 Female 41 72000 0
## 328 10 Male 24 64500 0
## 329 547 Male 42 79500 0
## 330 174 Male 19 25000 0
## 331 467 Male 28 91500 0
## 332 502 Male 57 134500 1
## 333 553 Male 33 19500 0
## 334 293 Male 28 89000 0
## 335 719 Female 59 96500 1
## 336 759 Male 43 66500 0
## 337 740 Male 42 136500 1
## 338 87 Male 19 85000 0
## 339 753 Female 61 49500 1
## 340 389 Male 50 75500 0
## 341 792 Female 34 43000 0
## 342 783 Male 44 74500 0
## 343 487 Female 48 51500 1
## 344 84 Male 37 74000 0
## 345 59 Female 37 51500 0
## 346 153 Male 39 106000 1
## 347 920 Female 42 31500 0
## 348 535 Male 63 104500 1
## 349 883 Female 46 41000 1
## 350 750 Male 38 71500 1
## 351 320 Female 52 143500 1
## 352 458 Female 51 89500 1
## 353 396 Female 26 15000 0
## 354 416 Female 32 69500 0
## 355 575 Male 38 63500 0
## 356 659 Female 48 114500 1
## 357 208 Female 41 52500 0
## 358 534 Male 52 88500 1
## 359 97 Female 61 47500 1
## 360 628 Female 62 44500 0
## 361 682 Female 48 30000 1
## 362 970 Female 34 70500 0
## 363 496 Male 29 43000 0
## 364 762 Male 38 80500 1
## 365 495 Female 50 52500 1
## 366 574 Female 58 34500 1
## 367 514 Female 34 72000 0
## 368 873 Male 24 21500 0
## 369 867 Female 42 73500 0
## 370 272 Female 38 79500 0
## 371 383 Male 48 24500 1
## 372 178 Male 46 27500 1
## 373 739 Female 50 44000 0
## 374 47 Female 48 48500 0
## 375 625 Female 29 56500 0
## 376 801 Male 38 73500 0
## 377 615 Male 61 35500 1
## 378 606 Male 43 55500 0
## 379 206 Female 40 139500 1
## 380 997 Female 41 143500 1
## 381 216 Male 36 59500 0
## 382 898 Female 45 45000 1
## 383 77 Male 19 70000 0
## 384 529 Female 43 133000 0
## 385 769 Male 38 61000 0
## 386 845 Female 55 92500 1
## 387 239 Female 54 104000 1
## 388 459 Male 25 87000 0
## 389 949 Male 31 76000 0
## 390 552 Male 60 42000 1
## 391 317 Female 38 81500 0
## 392 692 Female 36 72500 0
## 393 683 Female 45 92500 1
## 394 209 Female 54 148500 1
## 395 187 Male 29 60500 0
## 396 170 Female 23 28000 0
## 397 244 Male 46 88000 1
## 398 512 Male 26 30000 0
## 399 679 Female 39 81500 0
## 400 838 Male 45 151500 1
## 401 224 Female 36 51500 0
## 402 600 Female 40 62500 0
## 403 808 Male 25 85500 0
## 404 171 Female 52 147500 1
## 405 220 Female 30 116000 0
## 406 956 Male 29 43000 0
## 407 936 Male 40 65000 0
## 408 333 Male 20 71500 0
## 409 854 Male 25 80000 0
## 410 58 Female 43 81500 1
## 411 716 Female 53 91500 1
## 412 773 Female 46 135500 0
## 413 835 Female 56 84500 1
## 414 350 Female 26 80000 0
## 415 164 Male 31 18000 0
## 416 379 Female 62 31500 1
## 417 637 Male 42 149000 1
## 418 841 Male 48 34500 1
## 419 205 Female 36 66500 0
## 420 243 Female 31 89000 0
## 421 361 Female 37 55500 0
## 422 269 Female 41 57500 0
## 423 31 Female 28 59500 0
## 424 745 Male 33 43000 0
## 425 880 Male 63 36500 1
## 426 938 Female 50 36000 1
## 427 910 Female 34 25000 0
## 428 284 Male 29 33500 0
## 429 71 Male 42 46500 0
## 430 143 Male 28 90500 0
## 431 452 Male 21 88000 0
## 432 252 Female 53 34000 1
## 433 810 Female 38 65000 0
## 434 541 Female 21 68000 0
## 435 237 Male 41 66500 0
## 436 931 Female 43 112000 1
## 437 664 Male 47 89500 1
## 438 616 Male 38 57500 0
## 439 566 Female 29 45500 0
## 440 853 Male 38 71000 0
## 441 169 Female 47 107000 1
## 442 472 Female 41 72000 0
## 443 865 Female 29 85500 0
## 444 791 Female 37 146000 1
## 445 878 Male 41 61500 0
## 446 271 Female 36 148500 1
## 447 763 Male 26 32000 0
## 448 98 Male 32 75500 0
## 449 621 Female 26 72000 0
## 450 113 Male 42 80500 0
## 451 576 Male 61 103500 1
## 452 862 Female 53 139500 1
## 453 896 Male 43 77500 0
## 454 32 Female 47 50000 1
## 455 871 Male 31 50500 0
## 456 944 Female 49 84500 0
## 457 888 Female 37 127500 1
## 458 761 Female 52 90000 1
## 459 533 Male 46 23000 1
## 460 245 Male 23 19500 0
## 461 102 Male 31 16500 0
## 462 961 Male 35 72000 0
## 463 976 Female 19 26000 0
## 464 685 Male 32 120000 1
## 465 2 Female 38 63500 0
## 466 858 Female 40 82500 0
## 467 434 Female 19 69500 0
## 468 849 Female 22 63000 0
## 469 66 Female 59 106500 1
## 470 795 Female 46 46500 1
## 471 876 Male 48 44500 0
## 472 843 Male 46 131500 1
## 473 537 Female 41 51000 0
## 474 851 Female 58 47000 1
## 475 163 Male 39 146500 1
## 476 141 Female 57 105500 1
## 477 339 Male 35 79000 0
## 478 262 Female 39 59000 0
## 479 210 Male 44 130500 1
## 480 261 Female 34 114500 0
## 481 720 Female 61 25500 1
## 482 725 Female 38 58500 0
## 483 430 Female 42 75000 0
## 484 425 Female 30 29500 0
## 485 506 Female 28 37000 0
## 486 421 Female 53 39500 1
## 487 157 Male 25 24500 0
## 488 214 Female 60 46000 1
## 489 149 Male 52 24500 1
## 490 609 Male 40 123500 1
## 491 754 Female 33 69000 0
## 492 259 Male 27 17500 0
## 493 309 Female 56 126500 1
## 494 246 Female 38 50000 0
## 495 5 Male 41 108500 1
## 496 138 Female 43 74500 0
## 497 140 Male 47 80500 1
## 498 236 Female 38 94500 1
## 499 777 Female 52 138000 1
## 500 708 Female 27 96000 1
## 501 765 Male 40 57000 0
## 502 334 Female 46 22000 0
## 503 198 Male 32 18000 0
## 504 852 Female 32 117000 0
## 505 490 Male 40 59000 0
## 506 548 Female 39 56500 0
## 507 22 Female 56 131500 1
## 508 587 Female 30 79000 0
## 509 96 Male 41 76500 0
## 510 690 Female 61 40500 1
## 511 463 Male 56 74500 1
## 512 799 Male 35 39000 0
## 513 194 Female 48 131000 1
## 514 836 Female 36 63000 0
## 515 314 Female 38 138500 1
## 516 43 Female 51 146000 1
## 517 913 Female 59 29000 1
## 518 72 Female 28 85500 0
## 519 975 Male 40 72500 1
## 520 270 Male 23 63000 0
## 521 315 Male 56 40500 1
## 522 176 Male 41 73500 0
## 523 49 Male 26 91500 0
## 524 36 Male 20 26500 0
## 525 297 Male 24 84000 0
## 526 985 Female 50 40500 1
## 527 158 Male 42 65000 0
## 528 658 Female 27 58000 0
## 529 341 Female 23 66000 0
## 530 718 Female 47 83500 0
## 531 550 Male 49 91500 1
## 532 901 Female 35 73500 0
## 533 151 Female 57 26000 1
## 534 285 Male 39 96000 1
## 535 570 Female 31 80500 0
## 536 833 Female 59 24500 1
## 537 127 Male 40 81500 1
## 538 76 Female 43 109500 1
## 539 915 Female 42 70000 0
## 540 549 Female 35 50000 0
## 541 217 Female 39 51500 0
## 542 582 Male 50 29500 1
## 543 694 Female 32 86000 0
## 544 186 Male 35 27000 0
## 545 518 Female 40 135500 1
## 546 513 Female 20 22500 0
## 547 147 Male 21 75500 0
## 548 491 Female 49 135500 1
## 549 889 Male 36 51500 0
## 550 907 Male 36 21500 0
## 551 601 Female 58 132500 1
## 552 875 Female 44 74500 0
## 553 455 Male 40 135500 1
## 554 526 Male 20 86000 0
## 555 568 Male 28 21500 0
## 556 75 Male 23 82500 0
## 557 930 Male 48 41000 1
## 558 38 Female 43 76500 0
## 559 713 Female 19 21000 0
## 560 515 Female 30 62000 0
## 561 146 Male 32 18000 0
## 562 991 Female 53 38500 1
## 563 377 Male 44 54500 0
## 564 227 Male 37 55000 0
## 565 406 Male 46 28000 1
## 566 21 Male 36 62500 0
## 567 809 Male 54 70000 1
## 568 630 Female 44 39000 0
## 569 507 Male 51 92500 1
## 570 107 Female 47 97500 0
## 571 966 Female 32 150000 1
## 572 481 Female 62 78500 1
## 573 816 Female 25 90500 0
## 574 893 Female 42 61500 0
## 575 986 Male 35 58000 0
## 576 967 Male 25 22000 0
## 577 982 Male 41 48500 0
## 578 803 Female 58 23000 1
## 579 9 Male 46 33500 1
## 580 822 Male 44 89500 1
## 581 360 Male 35 108000 0
## 582 200 Female 44 74500 0
## 583 610 Female 48 96000 1
## 584 593 Female 45 55500 0
## 585 584 Male 41 60500 0
## 586 925 Female 54 105500 1
## 587 410 Male 22 18000 0
## 588 717 Male 18 82000 0
## 589 33 Male 30 62500 0
## 590 115 Male 40 61000 0
## 591 556 Male 41 59000 0
## 592 636 Male 52 76500 0
## 593 947 Female 21 16000 0
## 594 929 Female 27 81500 0
## 595 442 Male 29 75000 0
## 596 159 Male 29 61000 0
## 597 959 Male 27 90000 0
## 598 152 Male 24 23000 0
## 599 256 Female 37 80000 0
## 600 296 Female 26 35000 0
## 601 37 Female 22 27000 0
## 602 144 Male 53 72000 1
## 603 764 Female 27 53500 0
## 604 51 Male 28 79000 0
## 605 523 Female 44 74500 0
## 606 399 Female 43 71500 0
## 607 61 Male 59 145500 1
## 608 329 Male 24 55000 0
## 609 165 Male 31 81500 0
## 610 362 Female 37 71000 0
## 611 195 Female 52 135500 0
## 612 950 Male 60 83000 1
## 613 644 Male 36 125000 1
## 614 188 Female 25 28500 0
## 615 787 Female 29 86500 0
## 616 493 Female 37 57000 0
## 617 53 Male 60 42000 1
## 618 409 Female 60 124500 1
## 619 260 Female 54 108000 1
## 620 3 Male 20 74000 0
## 621 223 Female 45 81500 0
## 622 344 Female 49 24500 0
## 623 18 Male 63 44500 1
## 624 109 Male 47 118500 1
## 625 864 Female 27 44500 0
## 626 474 Female 38 52500 0
## 627 793 Male 55 39000 1
## 628 632 Female 18 86000 0
## 629 300 Female 18 68000 0
## 630 323 Male 61 84500 1
## 631 847 Female 28 85000 0
## 632 326 Female 26 52000 0
## 633 204 Female 36 73500 0
## 634 619 Male 32 19500 0
## 635 727 Female 35 25000 0
## 636 136 Female 54 35500 1
## 637 468 Female 40 82500 0
## 638 706 Male 43 55500 0
## 639 483 Female 46 114500 1
## 640 298 Female 24 89000 0
## 641 897 Male 37 144000 1
## 642 386 Female 46 74000 0
## 643 447 Male 43 105500 1
## 644 677 Female 48 52500 1
## 645 304 Male 42 108500 1
## 646 380 Male 50 89500 1
## 647 914 Male 30 49000 0
## 648 538 Male 33 31000 0
## 649 650 Male 39 71000 0
## 650 702 Male 40 54500 0
## 651 7 Female 51 134000 0
## 652 305 Female 53 22500 1
## 653 393 Male 37 100500 1
## 654 780 Female 40 80500 0
## 655 654 Male 22 73500 0
## 656 870 Female 41 115500 1
## 657 866 Female 45 82500 1
## 658 12 Male 29 90500 0
## 659 137 Female 60 89500 1
## 660 101 Male 38 93500 1
## 661 623 Female 47 42500 1
## 662 290 Male 34 112000 1
## 663 45 Male 30 17000 0
## 664 234 Female 44 82500 0
## 665 307 Male 29 80500 0
## 666 562 Male 29 124500 1
## 667 376 Female 37 76500 0
## 668 711 Male 50 25500 1
## 669 6 Female 36 45500 0
## 670 788 Female 59 42000 0
## 671 643 Female 57 33000 1
## 672 310 Male 49 34500 1
## 673 530 Male 27 31500 0
## 674 318 Male 49 42500 1
## 675 859 Female 30 84500 0
## 676 336 Male 38 53500 0
## 677 709 Female 41 60000 0
## 678 278 Female 43 91500 1
## 679 413 Female 53 90500 1
## 680 749 Male 35 55000 0
## 681 594 Male 50 45500 0
## 682 366 Male 45 56500 0
## 683 201 Female 34 61500 0
## 684 525 Female 42 72500 0
## 685 666 Female 19 45500 0
## 686 29 Male 36 40500 0
## 687 665 Male 48 81500 0
## 688 444 Female 26 84000 0
## 689 57 Male 26 88500 0
## 690 746 Female 36 54000 0
## 691 834 Female 42 80000 1
## 692 995 Male 41 58500 0
## 693 11 Male 40 57000 0
## 694 653 Female 35 60000 0
## 695 358 Male 39 101500 1
## 696 79 Male 35 91000 1
## 697 671 Female 42 75500 0
## 698 872 Female 42 73500 0
## 699 672 Male 38 61000 0
## 700 8 Female 54 26000 1
## 701 129 Male 47 105000 1
## 702 328 Male 37 77000 0
## 703 161 Male 42 53500 0
## 704 368 Male 36 89500 0
## 705 499 Female 35 23000 0
## 706 308 Male 35 50000 0
## 707 771 Male 28 89500 0
## 708 180 Male 27 33500 0
## 709 44 Male 49 142500 0
## 710 265 Female 42 61500 0
## 711 855 Male 41 73500 0
## 712 363 Female 51 32500 1
## 713 840 Male 36 52000 0
## 714 999 Male 60 131500 1
## 715 17 Male 40 57000 0
## 716 133 Female 39 65500 0
## 717 955 Male 33 101500 1
## 718 894 Male 46 117000 1
## 719 503 Male 35 73000 0
## 720 869 Male 60 102000 1
## 721 48 Female 32 135000 1
## 722 617 Male 19 76000 0
## 723 874 Female 39 128500 1
## 724 578 Female 38 62500 0
## 725 312 Female 55 23500 1
## 726 110 Male 35 113500 1
## 727 13 Male 29 80000 0
## 728 734 Female 47 113000 1
## 729 670 Male 41 72000 0
## 730 603 Female 43 62500 0
## 731 189 Male 44 54500 0
## 732 559 Male 30 76500 0
## 733 226 Female 50 146500 1
## 734 837 Female 52 114000 0
## 735 346 Female 28 58500 0
## 736 733 Female 36 126000 1
## 737 306 Male 37 52000 0
## 738 448 Male 59 135500 1
## 739 585 Male 41 58500 0
## 740 726 Male 23 20000 0
## 741 391 Male 25 59500 0
## 742 56 Female 37 80000 0
## 743 286 Male 39 122000 1
## 744 482 Female 60 30500 1
## 745 595 Female 40 142000 1
## 746 100 Male 47 24500 1
## 747 807 Male 36 74500 0
## 748 111 Male 38 41500 0
## 749 939 Female 55 27500 1
## 750 215 Female 60 28500 1
## 751 449 Male 34 115000 0
## 752 55 Male 27 87500 0
## 753 288 Female 33 118500 0
## 754 817 Female 28 97500 1
## 755 543 Female 29 83000 0
## 756 752 Female 33 51000 0
## 757 516 Female 53 115500 0
## 758 856 Male 38 76500 0
## 759 50 Male 37 53000 0
## 760 627 Male 30 81500 0
## 761 657 Male 38 145500 1
## 762 4 Female 48 119000 1
## 763 433 Female 33 113000 0
## 764 238 Female 50 142500 1
## 765 940 Male 45 79000 0
## 766 193 Male 51 35500 1
## 767 80 Female 35 72000 0
## 768 988 Male 41 79000 0
## 769 784 Female 53 143000 1
## 770 475 Female 55 109500 1
## 771 240 Female 46 82000 0
## 772 969 Female 25 56500 0
## 773 580 Male 18 52000 0
## 774 945 Male 58 144000 1
## 775 74 Female 29 60500 0
## 776 486 Female 50 20000 1
## 777 937 Female 36 58500 0
## 778 422 Male 59 62500 1
## 779 359 Female 48 47500 1
## 780 446 Female 50 37500 1
## 781 231 Female 27 85500 0
## 782 688 Male 27 16500 0
## 783 370 Female 24 49500 0
## 784 699 Female 42 77500 1
## 785 122 Male 35 44500 0
## 786 183 Female 44 62500 0
## 787 484 Male 37 126500 1
## 788 431 Male 63 44500 1
## 789 697 Male 41 72500 1
## 790 678 Male 47 20000 1
## 791 899 Male 27 88000 0
## 792 28 Female 56 104000 1
## 793 313 Male 38 77500 0
## 794 251 Female 31 68000 0
## 795 93 Male 50 87500 1
## 796 125 Male 52 30500 1
## 797 811 Female 49 97500 1
## 798 923 Female 38 74500 0
## 799 352 Female 24 55000 0
## 800 635 Male 42 124500 1
## 801 519 Female 46 32000 1
## 802 551 Female 41 52500 0
## 803 374 Male 60 62500 1
## 804 461 Female 40 59500 0
## 805 714 Female 22 17500 0
## 806 776 Female 40 95500 1
## 807 295 Male 40 55500 0
## 808 276 Female 55 116500 0
## 809 602 Female 28 18500 0
## 810 825 Female 27 119500 0
## 811 705 Male 43 65500 0
## 812 364 Female 41 76500 0
## 813 528 Female 37 64500 0
## 814 103 Male 39 134000 1
## 815 60 Male 48 141000 0
## 816 480 Male 37 119500 1
## 817 73 Male 60 76500 1
## 818 301 Male 37 53500 0
## 819 839 Female 38 79500 1
## 820 400 Male 30 15000 0
## 821 340 Female 53 22500 1
## 822 691 Female 58 27500 1
## 823 247 Female 43 144500 1
## 824 64 Female 40 80500 1
## 825 24 Female 37 93000 1
## 826 842 Male 30 80000 0
## 827 497 Female 45 72500 0
## 828 831 Female 45 22000 1
## 829 436 Female 23 48000 0
## 830 67 Female 38 50000 0
## 831 737 Male 26 23500 0
## 832 983 Female 33 87500 0
## 833 218 Female 27 54000 0
## 834 166 Male 43 67500 0
## 835 454 Male 59 130000 1
## 836 437 Male 42 53500 0
## 837 732 Female 31 118000 1
## 838 281 Male 39 72500 0
## 839 827 Male 31 66000 0
## 840 589 Male 35 38000 0
## 841 723 Female 60 84500 0
## 842 909 Male 38 56500 0
## 843 544 Male 61 146500 1
## 844 992 Male 26 34500 0
## 845 638 Male 39 127500 1
## 846 812 Male 36 28500 0
## 847 403 Male 45 67500 0
## 848 656 Female 48 35000 1
## 849 250 Male 41 63500 0
## 850 564 Male 21 87500 0
## 851 598 Female 42 90000 1
## 852 62 Male 25 20500 0
## 853 40 Male 26 86000 0
## 854 767 Male 37 72000 0
## 855 202 Female 40 72500 0
## 856 591 Female 60 108000 1
## 857 207 Male 19 53500 0
## 858 696 Female 54 83500 1
## 859 984 Male 36 60500 0
## 860 586 Female 41 67500 0
## 861 626 Female 37 33000 0
## 862 724 Male 54 73500 1
## 863 354 Male 41 53500 0
## 864 884 Male 56 60000 1
## 865 758 Male 43 150500 1
## 866 545 Male 49 119500 1
## 867 908 Female 39 61000 0
## 868 248 Female 23 82000 0
## 869 355 Female 31 63500 0
## 870 289 Male 35 75000 0
## 871 804 Female 48 145500 1
## 872 618 Male 39 77000 0
## 873 916 Female 42 73500 0
## 874 292 Female 33 118500 1
## 875 488 Female 47 49000 1
## 876 52 Male 40 71000 1
## 877 815 Female 23 28500 0
## 878 473 Male 41 58500 0
## 879 337 Female 35 47000 0
## 880 639 Female 40 148500 1
## 881 932 Male 36 76500 0
## 882 274 Male 46 23500 1
## 883 451 Male 31 108500 1
## 884 63 Female 55 140500 1
## 885 291 Female 52 41500 1
## 886 155 Male 40 47000 0
## 887 751 Male 45 106500 1
## 888 860 Male 36 60000 0
## 889 789 Male 42 73500 0
## 890 303 Male 52 91500 1
## 891 232 Male 36 118000 1
## 892 181 Female 47 30000 1
## 893 560 Male 43 66500 0
## 894 343 Female 58 123500 1
## 895 501 Female 57 122000 1
## 896 464 Female 49 31500 1
## 897 319 Male 49 75500 1
## 898 952 Male 37 145500 1
## 899 356 Female 42 64500 0
## 900 779 Male 45 22000 1
## 901 14 Male 47 60500 0
## 902 192 Male 39 35500 0
## 903 581 Female 35 26500 0
## 904 494 Female 22 69500 0
## 905 981 Male 37 34500 0
## 906 412 Female 61 97500 1
## 907 505 Female 33 60000 0
## 908 16 Male 55 152500 1
## 909 508 Male 36 54500 0
## 910 211 Male 39 62500 0
## 911 818 Male 48 35500 1
## 912 823 Female 32 117000 1
## 913 106 Male 33 19500 0
## 914 900 Female 21 83500 0
## 915 561 Male 39 72500 0
## 916 797 Female 41 72000 0
## 917 989 Female 24 27000 0
## 918 558 Female 51 140500 1
## 919 441 Male 38 71000 0
## 920 963 Female 52 21000 1
## 921 457 Male 34 43000 0
## 922 802 Male 48 106500 1
## 923 470 Female 29 28000 0
## 924 707 Female 59 102500 1
## 925 712 Female 29 47000 0
## 926 283 Female 50 49500 0
## 927 280 Male 38 75500 0
## 928 388 Female 36 61500 0
## 929 781 Female 28 59000 0
## 930 608 Male 43 59500 0
## 931 766 Male 57 74000 1
## 932 177 Male 36 109500 0
## 933 404 Female 49 139500 1
## 934 968 Female 34 52500 0
## 935 212 Female 47 141500 1
## 936 539 Female 35 65000 0
## 937 886 Male 50 107500 1
## 938 748 Female 41 73500 0
## 939 735 Male 50 90500 1
## 940 173 Female 56 145500 1
## 941 592 Female 35 57000 0
## 942 255 Male 40 78000 0
## 943 824 Female 49 34500 1
## 944 631 Male 25 90000 0
## 945 197 Male 36 33000 0
## 946 405 Female 48 138000 1
## 947 798 Male 42 54000 0
## 948 185 Male 51 76500 1
## 949 492 Male 49 65000 0
## 950 731 Male 46 80500 0
## 951 980 Male 27 20000 0
## 952 660 Male 39 52500 0
## 953 479 Female 35 97000 1
## 954 680 Male 47 34000 1
## 955 830 Female 52 38000 1
## 956 150 Female 49 30500 1
## 957 760 Female 36 26500 0
## 958 857 Male 26 80500 0
## 959 927 Female 38 34500 0
## 960 154 Female 48 50500 1
## 961 1 Male 32 100000 1
## 962 933 Female 40 75000 0
## 963 424 Female 42 81500 0
## 964 814 Male 41 62500 0
## 965 184 Female 24 83500 0
## 966 962 Male 52 150000 1
## 967 611 Female 48 133500 1
## 968 114 Male 41 52000 0
## 969 39 Male 19 83500 0
## 970 721 Male 29 148000 1
## 971 642 Female 58 101000 1
## 972 844 Female 36 48500 0
## 973 440 Male 63 85500 1
## 974 577 Female 35 44000 0
## 975 757 Male 22 81000 0
## 976 65 Female 24 32000 0
## 977 124 Female 39 71000 0
## 978 794 Female 31 15000 0
## 979 998 Female 60 77500 1
## 980 287 Male 32 59500 0
## 981 82 Female 46 23500 1
## 982 695 Female 39 79000 0
## 983 946 Male 39 60500 0
## 984 676 Male 42 54000 0
## 985 649 Female 28 87000 0
## 986 477 Female 45 77500 0
## 987 572 Male 48 33000 1
## 988 832 Female 31 71000 0
## 989 302 Male 33 121500 1
## 990 420 Female 27 84000 0
## 991 971 Male 25 56500 0
## 992 382 Female 30 48500 0
## 993 168 Female 24 67500 0
## 994 476 Female 54 136500 0
## 995 951 Female 53 104000 1
## 996 863 Male 38 59000 0
## 997 800 Female 47 23500 0
## 998 407 Female 28 138500 1
## 999 299 Female 48 134000 1
## 1000 687 Female 44 73500 0
# manage data type
car_df$Gender<-factor(car_df$Gender)
car_df$Purchased<-factor(car_df$Purchased)
#### ggplot with geom_boxplot
up1<-ggplot(data = car_df, aes(y=AnnualSalary))+
geom_boxplot()+scale_y_continuous(breaks = c(40000, 60000, 70000, 80000, 120000))
up2<-ggplot(data = car_df, aes(y=Age))+geom_boxplot()
uni_analysis<-grid.arrange(up1, up2, nrow=1)
#### ggplot with geom_boxplot
bp1<-ggplot(data = car_df, aes(x= Gender, y=AnnualSalary))+geom_boxplot()+
labs(title = 'Decision factor by Gender and Annual Salary')+
theme(plot.title = element_text(hjust = 0.5))+scale_y_continuous(breaks = c(40000, 60000, 70000, 80000, 120000))
bp2<-ggplot(data = car_df, aes(x= Purchased, y=AnnualSalary))+geom_boxplot()+
labs(title = 'Decision factor by Purchased and Annual Salary ')+
theme(plot.title = element_text(hjust = 0.5))+
scale_x_discrete(breaks = c(0, 1), labels =c('Tidak membeli', 'Membeli'))+
scale_y_continuous(breaks = c(40000, 60000, 70000, 80000, 120000))
biva_analysis<-grid.arrange(bp1, bp2, nrow=1)
#### ggplot with geom_point
multi_analysis<-ggplot(data = car_df, aes(x=AnnualSalary, y=Age, color=Gender, shape=Purchased))+
geom_point()+scale_x_continuous(breaks= c(40000, 80000, 120000),labels = c('40000$', '80000$', '120000$'))+
labs(title = 'Client Distribution by Age and Annual Salary')+
theme(plot.title = element_text(hjust = 0.5))+
scale_shape_discrete(name = 'Status', labels=c('Tdk membeli', 'Membeli'))
multi_analysis
# 1. The data shows that, the average annual salary of people is around 72K.
# 2. The average who comes to buy a car is 40 years old.
# 3. The average annual salary of female is higher than male.
# 4. People who have an annual salary above 80K will tend to buy and can pay off
# whereas people who have an annual salary below 70k will tend not to buy a car.
# 5. Most people looking for a car are female.
# 6. Based on Pearson's CC, only with age we can predict 62% the person will buy the car.
#remove User.ID
car_df$User.ID<-NULL
m<-nrow(car_df)
n<-ncol(car_df)
mtrain<-floor(m*0.7)
mtest<-m-mtrain
set.seed(2022)
train_idx<-sample(m, mtrain)
train_df<-car_df[train_idx, ]
test_df<-car_df[-train_idx, ]
model_lr<-glm(formula = Purchased~. , data = train_df, family = binomial)
model_lr
##
## Call: glm(formula = Purchased ~ ., family = binomial, data = train_df)
##
## Coefficients:
## (Intercept) GenderMale Age AnnualSalary
## -1.268e+01 1.035e-01 2.284e-01 3.612e-05
##
## Degrees of Freedom: 699 Total (i.e. Null); 696 Residual
## Null Deviance: 940.6
## Residual Deviance: 496.8 AIC: 504.8
model_ctree<-ctree(formula = Purchased~. , data = train_df)
plot(model_ctree)
set.seed(2022)
model_forest<-randomForest(formula = Purchased~. , data = train_df)
model_forest
##
## Call:
## randomForest(formula = Purchased ~ ., data = train_df)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 1
##
## OOB estimate of error rate: 9.29%
## Confusion matrix:
## 0 1 class.error
## 0 385 37 0.08767773
## 1 28 250 0.10071942
model_svm<- svm(formula = Purchased~. , data = train_df)
model_svm
##
## Call:
## svm(formula = Purchased ~ ., data = train_df)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 192
##actual data to compare with prediction
actual_purchase<-test_df$Purchased
Info :
Not Purchased(0) is negative and Purchased(1) is positive.
##a function for compute accuracy, precision, recall, f1-score
computeClass<- function(actual, prediction, model_name){
i <- table(prediction, actual)
tp <- i[2,2]
tn <- i[1,1]
fp <- i[2,1]
fn <- i[1,2]
i
accuracy <- (tp+tn) / (tp+tn+fp+fn)
precision <- tp / (tp+fp)
recall <- tp / (tp+fn)
f1score <- (2*precision*recall) / (precision+recall)
result <- paste('** Method Name: ', model_name, '\n',
'Accuracy : ', round(accuracy, 4) * 100, '%', '\n',
'Precision: ', round(precision, 4) * 100, '%', '\n',
'Recall : ', round(recall, 4) * 100, '%', '\n',
'F1 Score : ', round(f1score, 4) * 100, '%', '\n',
sep = '')
cat(result)
}
#5.3.1 Logistic Regression
prob<-predict(model_lr, test_df, type = 'response')
predict_lr<-factor(prob>0.99,
levels = c(FALSE, TRUE),
labels = c('Not Purchased', 'Purchased'))
computeClass(actual_purchase, predict_lr, 'Logistic Regression')
## ** Method Name: Logistic Regression
## Accuracy : 63.67%
## Precision: 100%
## Recall : 12.1%
## F1 Score : 21.58%
#5.3.2 Decision Tree
predict_tree<- predict(model_ctree, test_df)
computeClass(actual_purchase, predict_tree, 'Decision Tree')
## ** Method Name: Decision Tree
## Accuracy : 88.33%
## Precision: 82.48%
## Recall : 91.13%
## F1 Score : 86.59%
#5.3.3 Random Forest
predict_forest<- predict(model_forest, test_df)
computeClass(actual_purchase, predict_forest, 'Random Forest')
## ** Method Name: Random Forest
## Accuracy : 89%
## Precision: 85.27%
## Recall : 88.71%
## F1 Score : 86.96%
#5.3.4 SVM
predict_svm<- predict(model_svm, test_df)
computeClass(actual_purchase, predict_svm, 'SVM')
## ** Method Name: SVM
## Accuracy : 88.67%
## Precision: 84.62%
## Recall : 88.71%
## F1 Score : 86.61%
### 1. Prioritizing car deals to clients over the age of 52.
### 2. Targeting car sales to clients who earn $91500 or more.
### 3. Offering cars to clients between the ages of 44 to 52 with an average salary of over $91500.