library(ISLR2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
##
## Boston
library(class)
library(naivebayes)
## naivebayes 1.0.0 loaded
## For more information please visit:
## https://majkamichal.github.io/naivebayes/
library(corrplot)
## corrplot 0.95 loaded
library(e1071)
data(Weekly)
summary(Weekly)
## Year Lag1 Lag2 Lag3
## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950
## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580
## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410
## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472
## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090
## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260
## Lag4 Lag5 Volume Today
## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950
## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540
## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410
## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499
## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050
## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260
## Direction
## Down:484
## Up :605
##
##
##
##
pairs(Weekly)
str(`Weekly`)
## 'data.frame': 1089 obs. of 9 variables:
## $ Year : num 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
## $ Lag1 : num 0.816 -0.27 -2.576 3.514 0.712 ...
## $ Lag2 : num 1.572 0.816 -0.27 -2.576 3.514 ...
## $ Lag3 : num -3.936 1.572 0.816 -0.27 -2.576 ...
## $ Lag4 : num -0.229 -3.936 1.572 0.816 -0.27 ...
## $ Lag5 : num -3.484 -0.229 -3.936 1.572 0.816 ...
## $ Volume : num 0.155 0.149 0.16 0.162 0.154 ...
## $ Today : num -0.27 -2.576 3.514 0.712 1.178 ...
## $ Direction: Factor w/ 2 levels "Down","Up": 1 1 2 2 2 1 2 2 2 1 ...
weekly_numeric <- Weekly[, sapply(Weekly, is.numeric)] # Select numeric columns
cor_matrix <- cor(weekly_numeric)
print(cor_matrix)
## Year Lag1 Lag2 Lag3 Lag4
## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876
## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535
## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865
## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000
## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027
## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873
## Lag5 Volume Today
## Year -0.030519101 0.84194162 -0.032459894
## Lag1 -0.008183096 -0.06495131 -0.075031842
## Lag2 -0.072499482 -0.08551314 0.059166717
## Lag3 0.060657175 -0.06928771 -0.071243639
## Lag4 -0.075675027 -0.06107462 -0.007825873
## Lag5 1.000000000 -0.05851741 0.011012698
## Volume -0.058517414 1.00000000 -0.033077783
## Today 0.011012698 -0.03307778 1.000000000
attach(`Weekly`)
plot(Volume)
corrplot(cor(Weekly[, -9]), diag = FALSE)
glm.fits=glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data=Weekly,family=binomial )
summary (glm.fits)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Weekly)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
coef(glm.fits)
## (Intercept) Lag1 Lag2 Lag3 Lag4 Lag5
## 0.26686414 -0.04126894 0.05844168 -0.01606114 -0.02779021 -0.01447206
## Volume
## -0.02274153
summary(glm.fits)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686414 0.08592961 3.1056134 0.001898848
## Lag1 -0.04126894 0.02641026 -1.5626099 0.118144368
## Lag2 0.05844168 0.02686499 2.1753839 0.029601361
## Lag3 -0.01606114 0.02666299 -0.6023760 0.546923890
## Lag4 -0.02779021 0.02646332 -1.0501409 0.293653342
## Lag5 -0.01447206 0.02638478 -0.5485006 0.583348244
## Volume -0.02274153 0.03689812 -0.6163330 0.537674762
summary (glm.fits)$coef[,4]
## (Intercept) Lag1 Lag2 Lag3 Lag4 Lag5
## 0.001898848 0.118144368 0.029601361 0.546923890 0.293653342 0.583348244
## Volume
## 0.537674762
glm.probs=predict(glm.fits,type="response")
glm.probs
## 1 2 3 4 5 6 7 8
## 0.6086249 0.6010314 0.5875699 0.4816416 0.6169013 0.5684190 0.5786097 0.5151972
## 9 10 11 12 13 14 15 16
## 0.5715200 0.5554287 0.6092096 0.5370125 0.5125263 0.5858984 0.6248679 0.5475215
## 17 18 19 20 21 22 23 24
## 0.4993612 0.5929660 0.5235241 0.5914015 0.5233199 0.5793948 0.5487893 0.6177056
## 25 26 27 28 29 30 31 32
## 0.5527999 0.5460343 0.5685318 0.5803125 0.6172036 0.4934650 0.6555741 0.6171904
## 33 34 35 36 37 38 39 40
## 0.5463122 0.5511387 0.5427226 0.6511888 0.4799924 0.6554093 0.5078026 0.5874222
## 41 42 43 44 45 46 47 48
## 0.5561466 0.5779499 0.5162131 0.5732572 0.5844682 0.5256940 0.5780687 0.5661878
## 49 50 51 52 53 54 55 56
## 0.5442368 0.4940888 0.6542135 0.5601120 0.5109621 0.5705423 0.5763342 0.4870961
## 57 58 59 60 61 62 63 64
## 0.5398895 0.5777228 0.5662011 0.5083862 0.5994140 0.5563850 0.5656076 0.5803157
## 65 66 67 68 69 70 71 72
## 0.5276638 0.5793301 0.5591107 0.5455255 0.5626275 0.6424553 0.4996649 0.5716244
## 73 74 75 76 77 78 79 80
## 0.5696691 0.5414298 0.5723122 0.5912072 0.5833957 0.5178605 0.5794945 0.5648539
## 81 82 83 84 85 86 87 88
## 0.5284921 0.5898992 0.5801749 0.5416755 0.5294122 0.6018926 0.5809868 0.5468688
## 89 90 91 92 93 94 95 96
## 0.5407561 0.6363699 0.5079991 0.5762251 0.5940991 0.5378897 0.5444802 0.5743682
## 97 98 99 100 101 102 103 104
## 0.5874124 0.5822334 0.5117216 0.5888020 0.5910009 0.4909972 0.5507675 0.5619728
## 105 106 107 108 109 110 111 112
## 0.5365457 0.5784235 0.5839204 0.5588908 0.5865144 0.5321418 0.5632517 0.6142641
## 113 114 115 116 117 118 119 120
## 0.5423347 0.5482773 0.5555243 0.6298098 0.5174470 0.5533264 0.5897124 0.5315836
## 121 122 123 124 125 126 127 128
## 0.5725021 0.5773067 0.5652132 0.5634758 0.5499618 0.5571037 0.6009711 0.5702637
## 129 130 131 132 133 134 135 136
## 0.5619960 0.5065742 0.6207185 0.5383340 0.5685396 0.5446738 0.5674001 0.5743053
## 137 138 139 140 141 142 143 144
## 0.5677472 0.5910519 0.5363138 0.5714149 0.5295336 0.6119773 0.5696859 0.5728145
## 145 146 147 148 149 150 151 152
## 0.5330210 0.5631200 0.5645594 0.5613684 0.5532959 0.5417279 0.5860699 0.5586863
## 153 154 155 156 157 158 159 160
## 0.5551734 0.5231944 0.6073706 0.5614204 0.5433143 0.6001256 0.5618151 0.4948691
## 161 162 163 164 165 166 167 168
## 0.5962508 0.5762173 0.5675713 0.5564650 0.5635280 0.5416300 0.5589458 0.6246984
## 169 170 171 172 173 174 175 176
## 0.5169751 0.5695539 0.5875498 0.5438176 0.5721820 0.5760457 0.5594479 0.5524302
## 177 178 179 180 181 182 183 184
## 0.5435552 0.5888784 0.5582854 0.5758776 0.5517775 0.5669257 0.5676329 0.5622920
## 185 186 187 188 189 190 191 192
## 0.5527186 0.5714571 0.5691234 0.5524204 0.5602703 0.5535162 0.5542721 0.5833821
## 193 194 195 196 197 198 199 200
## 0.5421788 0.6025984 0.5261215 0.5888043 0.5242834 0.5936378 0.5590675 0.5621955
## 201 202 203 204 205 206 207 208
## 0.5716880 0.5561803 0.5680635 0.5682127 0.5503054 0.5618859 0.5779807 0.5468416
## 209 210 211 212 213 214 215 216
## 0.5854490 0.5300383 0.5728903 0.5699314 0.5706740 0.5614676 0.5650816 0.6028580
## 217 218 219 220 221 222 223 224
## 0.5596448 0.5156007 0.5952559 0.5868019 0.5721252 0.5812230 0.5589218 0.5244432
## 225 226 227 228 229 230 231 232
## 0.5993803 0.5647461 0.5612108 0.5459016 0.5937441 0.5076433 0.5832752 0.5855495
## 233 234 235 236 237 238 239 240
## 0.5857413 0.5380450 0.5749373 0.5438053 0.5702257 0.5413638 0.5940256 0.5473134
## 241 242 243 244 245 246 247 248
## 0.5354131 0.5969382 0.5261704 0.5978580 0.5213158 0.6272494 0.5286936 0.6045370
## 249 250 251 252 253 254 255 256
## 0.5169829 0.5664237 0.5915468 0.5430643 0.5908881 0.5305143 0.6107822 0.5677341
## 257 258 259 260 261 262 263 264
## 0.5456921 0.5467197 0.5822734 0.5422839 0.5557904 0.5766811 0.5575591 0.5342318
## 265 266 267 268 269 270 271 272
## 0.5776740 0.5398437 0.5574790 0.5666197 0.5720399 0.5363851 0.5637602 0.5656644
## 273 274 275 276 277 278 279 280
## 0.5400001 0.5639787 0.5630773 0.5796036 0.5223389 0.5534837 0.5983258 0.5205742
## 281 282 283 284 285 286 287 288
## 0.5668187 0.5906810 0.5097030 0.5714526 0.5757395 0.5169049 0.5881619 0.5602070
## 289 290 291 292 293 294 295 296
## 0.5423076 0.5752488 0.5644755 0.5548749 0.5615064 0.5824449 0.5350584 0.5571488
## 297 298 299 300 301 302 303 304
## 0.5495397 0.5632003 0.5838696 0.5227471 0.5880212 0.5566692 0.5729664 0.5377796
## 305 306 307 308 309 310 311 312
## 0.5536240 0.5815944 0.5538216 0.5325246 0.5697948 0.5933091 0.5093568 0.5779388
## 313 314 315 316 317 318 319 320
## 0.5731189 0.5547897 0.5970898 0.4932252 0.5858283 0.5391747 0.5281452 0.5832704
## 321 322 323 324 325 326 327 328
## 0.6066850 0.5280506 0.6048338 0.5019399 0.5728660 0.6095074 0.5172612 0.5547660
## 329 330 331 332 333 334 335 336
## 0.5877458 0.5840046 0.5081875 0.5708770 0.5484547 0.5655005 0.5971151 0.5537199
## 337 338 339 340 341 342 343 344
## 0.5539844 0.5702030 0.5384996 0.6387192 0.5493983 0.5415859 0.5751379 0.5217641
## 345 346 347 348 349 350 351 352
## 0.5390281 0.6197944 0.5680755 0.5073720 0.5776221 0.5360180 0.5845579 0.5265423
## 353 354 355 356 357 358 359 360
## 0.5262954 0.6115952 0.5487298 0.5432930 0.5768577 0.5275021 0.5095094 0.6109677
## 361 362 363 364 365 366 367 368
## 0.5979607 0.5127032 0.5506223 0.5997847 0.5169115 0.5706009 0.5342584 0.5931879
## 369 370 371 372 373 374 375 376
## 0.5457038 0.5105159 0.6061357 0.5591364 0.5588375 0.5737989 0.5793927 0.5063235
## 377 378 379 380 381 382 383 384
## 0.6475012 0.5083097 0.6203323 0.5407460 0.5029741 0.5576154 0.5364731 0.5207690
## 385 386 387 388 389 390 391 392
## 0.6028332 0.5604089 0.4757713 0.5974355 0.5574903 0.5164057 0.5800462 0.5815701
## 393 394 395 396 397 398 399 400
## 0.5578011 0.4768951 0.6445267 0.5110922 0.6207023 0.5214015 0.5989503 0.5141917
## 401 402 403 404 405 406 407 408
## 0.5758607 0.5755234 0.5181782 0.5874211 0.5238373 0.6035994 0.5413394 0.6246353
## 409 410 411 412 413 414 415 416
## 0.5002412 0.6136433 0.5050945 0.5601064 0.5192467 0.6844858 0.4482419 0.6133784
## 417 418 419 420 421 422 423 424
## 0.5366787 0.5572317 0.5829508 0.5318511 0.5340025 0.5553525 0.5410862 0.5338357
## 425 426 427 428 429 430 431 432
## 0.5929422 0.5103211 0.5868051 0.5181872 0.5800338 0.5255719 0.5930736 0.5464041
## 433 434 435 436 437 438 439 440
## 0.5636584 0.5865046 0.5189682 0.6119890 0.5446836 0.5321998 0.5942007 0.5558781
## 441 442 443 444 445 446 447 448
## 0.5400075 0.6043057 0.5017929 0.5614943 0.5739404 0.5465742 0.6683375 0.5615654
## 449 450 451 452 453 454 455 456
## 0.4660692 0.6486217 0.5920476 0.6255748 0.4908835 0.4554909 0.6742924 0.5530564
## 457 458 459 460 461 462 463 464
## 0.5111866 0.5859531 0.4701471 0.5558328 0.5928991 0.5241571 0.5064317 0.5605027
## 465 466 467 468 469 470 471 472
## 0.6093877 0.5040861 0.6103963 0.5121752 0.4780138 0.6627907 0.5255695 0.5311087
## 473 474 475 476 477 478 479 480
## 0.5819564 0.5438847 0.5873708 0.5657513 0.5540727 0.5122585 0.5283033 0.6453425
## 481 482 483 484 485 486 487 488
## 0.4815798 0.5951610 0.5191683 0.5723108 0.5580248 0.5783080 0.5142229 0.6297825
## 489 490 491 492 493 494 495 496
## 0.4945390 0.6399040 0.4680441 0.6260839 0.5409826 0.5867130 0.4891312 0.5605466
## 497 498 499 500 501 502 503 504
## 0.5439818 0.6230237 0.5768180 0.5587314 0.5601920 0.5573860 0.5826234 0.4990125
## 505 506 507 508 509 510 511 512
## 0.5522657 0.7100983 0.4165974 0.5719054 0.6348528 0.5249158 0.5192383 0.5650196
## 513 514 515 516 517 518 519 520
## 0.5213008 0.5711516 0.5321640 0.5365227 0.5954012 0.5819323 0.4939905 0.5929459
## 521 522 523 524 525 526 527 528
## 0.5973992 0.4321437 0.6768488 0.5786161 0.5252752 0.5005390 0.6790260 0.4881697
## 529 530 531 532 533 534 535 536
## 0.5568162 0.6076837 0.4723601 0.6423898 0.3454964 0.6664303 0.6359851 0.5431683
## 537 538 539 540 541 542 543 544
## 0.5360510 0.5759321 0.4713744 0.6893839 0.5246529 0.5477103 0.5094661 0.5653308
## 545 546 547 548 549 550 551 552
## 0.5689184 0.6035017 0.5504700 0.4576816 0.6195025 0.5793218 0.5626917 0.5458301
## 553 554 555 556 557 558 559 560
## 0.5768882 0.5396859 0.5412363 0.5677811 0.5909953 0.5755163 0.5243599 0.6208475
## 561 562 563 564 565 566 567 568
## 0.5240309 0.6524718 0.4860934 0.5803736 0.5702262 0.5103699 0.6766865 0.5065701
## 569 570 571 572 573 574 575 576
## 0.5380064 0.6050962 0.5333236 0.5634127 0.5775587 0.5654597 0.5589410 0.5216716
## 577 578 579 580 581 582 583 584
## 0.5976741 0.5303480 0.5803302 0.6588484 0.4926406 0.5579756 0.6587106 0.4929260
## 585 586 587 588 589 590 591 592
## 0.5796839 0.6159637 0.5164082 0.5379362 0.4694882 0.6198054 0.5508937 0.5225409
## 593 594 595 596 597 598 599 600
## 0.6041745 0.5036198 0.5908384 0.6098257 0.5060939 0.6015740 0.5692736 0.5438907
## 601 602 603 604 605 606 607 608
## 0.5877794 0.5577073 0.5105280 0.6507249 0.5548131 0.6180004 0.3596924 0.7163238
## 609 610 611 612 613 614 615 616
## 0.6394566 0.5779706 0.4493578 0.5998973 0.4977878 0.5783635 0.5621367 0.5616890
## 617 618 619 620 621 622 623 624
## 0.5013058 0.6036959 0.4901973 0.5796923 0.5766799 0.5883556 0.5208045 0.5276387
## 625 626 627 628 629 630 631 632
## 0.5927599 0.5839588 0.5245479 0.5960699 0.5168693 0.5921110 0.5895644 0.5436214
## 633 634 635 636 637 638 639 640
## 0.5049412 0.5745288 0.5487506 0.5457591 0.6382133 0.5079525 0.5848666 0.5104466
## 641 642 643 644 645 646 647 648
## 0.6706126 0.5391181 0.5548078 0.5274079 0.5812275 0.5681988 0.5939935 0.6422907
## 649 650 651 652 653 654 655 656
## 0.5425670 0.4583355 0.6285521 0.6013063 0.6278461 0.5461432 0.5568079 0.5065030
## 657 658 659 660 661 662 663 664
## 0.5232405 0.6239058 0.5349621 0.5900956 0.5089219 0.6047730 0.6375462 0.5342957
## 665 666 667 668 669 670 671 672
## 0.5080706 0.4982668 0.5531530 0.5822630 0.5751894 0.5230775 0.5126628 0.6194379
## 673 674 675 676 677 678 679 680
## 0.5117847 0.6037463 0.6138306 0.5368959 0.4824428 0.6085781 0.5518537 0.5846550
## 681 682 683 684 685 686 687 688
## 0.6100005 0.5622388 0.5212453 0.4937156 0.7045398 0.4591680 0.5579273 0.5019379
## 689 690 691 692 693 694 695 696
## 0.5989802 0.5210005 0.5864058 0.5231227 0.5599086 0.4870586 0.5737534 0.5846341
## 697 698 699 700 701 702 703 704
## 0.5235577 0.5596720 0.5079938 0.5618378 0.5876116 0.5412792 0.5746223 0.5313397
## 705 706 707 708 709 710 711 712
## 0.5484921 0.5876204 0.5516370 0.5568272 0.5680953 0.5203460 0.6079205 0.4587737
## 713 714 715 716 717 718 719 720
## 0.6024785 0.5758942 0.5577907 0.5034688 0.5865739 0.5632888 0.5576696 0.5086788
## 721 722 723 724 725 726 727 728
## 0.5936381 0.5531257 0.5497506 0.5597227 0.5435763 0.5450462 0.5444482 0.5634503
## 729 730 731 732 733 734 735 736
## 0.5494791 0.5178417 0.5672053 0.5651662 0.5505195 0.5440208 0.6037956 0.5182241
## 737 738 739 740 741 742 743 744
## 0.5509473 0.5462722 0.6218541 0.5518285 0.5267678 0.5865525 0.5246443 0.5584827
## 745 746 747 748 749 750 751 752
## 0.5778633 0.5475059 0.5985855 0.5416472 0.5613601 0.5420462 0.5574981 0.5555546
## 753 754 755 756 757 758 759 760
## 0.5572137 0.5672736 0.5380961 0.6288296 0.5162164 0.5370392 0.6142804 0.5667208
## 761 762 763 764 765 766 767 768
## 0.5320097 0.5491606 0.5712877 0.5045668 0.5945248 0.5613639 0.5468458 0.5125795
## 769 770 771 772 773 774 775 776
## 0.5856365 0.5878151 0.5619301 0.4927447 0.5478086 0.5693274 0.5413631 0.5452481
## 777 778 779 780 781 782 783 784
## 0.5761716 0.5731165 0.5169958 0.5721833 0.5486374 0.5481036 0.6035000 0.5561117
## 785 786 787 788 789 790 791 792
## 0.5242339 0.5499644 0.5867428 0.5314951 0.5564833 0.5438242 0.5689854 0.6101141
## 793 794 795 796 797 798 799 800
## 0.4998552 0.5700161 0.5646784 0.5924735 0.4920364 0.5886151 0.5638022 0.5326469
## 801 802 803 804 805 806 807 808
## 0.5266755 0.5969437 0.5168311 0.5411947 0.5710588 0.5698966 0.5460172 0.5463105
## 809 810 811 812 813 814 815 816
## 0.5350782 0.5706830 0.5595445 0.5305365 0.5605804 0.5911755 0.5573610 0.5006451
## 817 818 819 820 821 822 823 824
## 0.5981353 0.5317577 0.5570797 0.5458900 0.5740883 0.5691984 0.5438528 0.5328946
## 825 826 827 828 829 830 831 832
## 0.5601641 0.5372154 0.5267937 0.5615436 0.5774812 0.4952175 0.5974479 0.5745732
## 833 834 835 836 837 838 839 840
## 0.4886085 0.5893925 0.5349565 0.5423141 0.5790393 0.5553924 0.5433329 0.5201113
## 841 842 843 844 845 846 847 848
## 0.5890816 0.5514397 0.5330581 0.5584058 0.5336897 0.5816505 0.5367150 0.5863620
## 849 850 851 852 853 854 855 856
## 0.5224365 0.5169490 0.5824923 0.6060852 0.5081011 0.5615662 0.5409679 0.6017141
## 857 858 859 860 861 862 863 864
## 0.5675841 0.5045995 0.5295374 0.6107446 0.5581926 0.4873935 0.5942473 0.5301539
## 865 866 867 868 869 870 871 872
## 0.5674075 0.5102382 0.5767224 0.5257364 0.5579780 0.5462655 0.5533285 0.5301312
## 873 874 875 876 877 878 879 880
## 0.5574106 0.5168306 0.5520166 0.5755018 0.5415534 0.5227594 0.5482981 0.5808434
## 881 882 883 884 885 886 887 888
## 0.5253596 0.5555041 0.5277809 0.5754684 0.5527539 0.5152885 0.5820282 0.5260608
## 889 890 891 892 893 894 895 896
## 0.5658781 0.5801068 0.4682125 0.5909513 0.5242807 0.6233820 0.5088109 0.5522289
## 897 898 899 900 901 902 903 904
## 0.5245540 0.5644530 0.5317816 0.5418341 0.5232540 0.5625742 0.5217368 0.5818423
## 905 906 907 908 909 910 911 912
## 0.4989459 0.5919640 0.5200675 0.5387161 0.5678082 0.5802998 0.5575884 0.4760558
## 913 914 915 916 917 918 919 920
## 0.5182760 0.6118128 0.5419495 0.5864565 0.5485419 0.4957682 0.5492068 0.5908790
## 921 922 923 924 925 926 927 928
## 0.5088339 0.5492171 0.5721235 0.4493546 0.6013925 0.5719251 0.4905404 0.5810776
## 929 930 931 932 933 934 935 936
## 0.5267599 0.5869964 0.5891485 0.4767770 0.5640925 0.5938866 0.4838119 0.5986703
## 937 938 939 940 941 942 943 944
## 0.4903157 0.5398586 0.6908610 0.4610420 0.5468855 0.5723763 0.5520243 0.5026530
## 945 946 947 948 949 950 951 952
## 0.5234972 0.6258337 0.4840350 0.6167381 0.4394534 0.5866459 0.5261577 0.5583840
## 953 954 955 956 957 958 959 960
## 0.4671105 0.6145592 0.4737788 0.5980399 0.5073452 0.5834843 0.5372300 0.5307699
## 961 962 963 964 965 966 967 968
## 0.5684054 0.5218672 0.5876939 0.5426882 0.5121505 0.5752169 0.5383800 0.5261866
## 969 970 971 972 973 974 975 976
## 0.5505270 0.4792343 0.5412403 0.5951428 0.5840288 0.5774733 0.2864474 0.7823952
## 977 978 979 980 981 982 983 984
## 0.4646948 0.7622202 0.5289881 0.4882306 0.3107741 0.7837046 0.5281769 0.4858185
## 985 986 987 988 989 990 991 992
## 0.5504902 0.4522655 0.6739877 0.4958246 0.4649993 0.5357007 0.5233753 0.6829693
## 993 994 995 996 997 998 999 1000
## 0.5229491 0.4564008 0.5715682 0.3959387 0.7280284 0.5090166 0.5321638 0.4871418
## 1001 1002 1003 1004 1005 1006 1007 1008
## 0.4748783 0.5010539 0.4837662 0.4647475 0.6549362 0.4223010 0.4776351 0.5748351
## 1009 1010 1011 1012 1013 1014 1015 1016
## 0.5655057 0.5383031 0.4701262 0.5620443 0.5412192 0.4606573 0.6206380 0.5818885
## 1017 1018 1019 1020 1021 1022 1023 1024
## 0.4604591 0.5194058 0.4781920 0.5464957 0.5402801 0.4789306 0.5398621 0.5927368
## 1025 1026 1027 1028 1029 1030 1031 1032
## 0.4998846 0.4506659 0.6027015 0.5696336 0.5319400 0.4217298 0.5811985 0.5949228
## 1033 1034 1035 1036 1037 1038 1039 1040
## 0.5286731 0.5008779 0.5550069 0.5339469 0.5124905 0.5907060 0.4938868 0.5786856
## 1041 1042 1043 1044 1045 1046 1047 1048
## 0.5515408 0.4859885 0.5325899 0.5584339 0.5523840 0.5990181 0.4897915 0.5510282
## 1049 1050 1051 1052 1053 1054 1055 1056
## 0.5253628 0.5223605 0.5193080 0.5295971 0.5436610 0.4947344 0.5746216 0.5399726
## 1057 1058 1059 1060 1061 1062 1063 1064
## 0.4143911 0.6376433 0.5164136 0.5854627 0.4980502 0.5751763 0.6151925 0.5175391
## 1065 1066 1067 1068 1069 1070 1071 1072
## 0.4025249 0.6635541 0.5127492 0.5800149 0.4985986 0.5892294 0.4777904 0.5436365
## 1073 1074 1075 1076 1077 1078 1079 1080
## 0.5196729 0.6175894 0.5279273 0.5191125 0.5531570 0.5044628 0.5370563 0.5358719
## 1081 1082 1083 1084 1085 1086 1087 1088
## 0.5371481 0.4876143 0.6079918 0.4940985 0.5415753 0.5012871 0.5797230 0.5509170
## 1089
## 0.5221216
contrasts(Direction)
## Up
## Down 0
## Up 1
glm.pred=rep("Down" ,1089)
glm.pred[glm.probs >.5]=" Up"
table(glm.pred ,Direction )
## Direction
## glm.pred Down Up
## Up 430 557
## Down 54 48
(557+54)/1089
## [1] 0.5610652
mean(glm.pred==Direction )
## [1] 0.04958678
# Define training set (Years 1990-2008)
train_index <- Weekly$Year < 2009
# Define test set
test_index <- !train_index
# Train logistic regression on Lag2 only
glm.fit2 <- glm(Direction ~ Lag2, data = Weekly, family = binomial, subset = train_index)
# Predict on test data
glm.probs2 <- predict(glm.fit2, Weekly[test_index, ], type = "response")
# Convert probabilities to class labels
glm.pred2 <- ifelse(glm.probs2 > 0.5, "Up", "Down")
# Confusion matrix for test data
conf_matrix2 <- table(glm.pred2, Weekly$Direction[test_index])
print(conf_matrix2)
##
## glm.pred2 Down Up
## Down 9 5
## Up 34 56
# Compute test accuracy
test_accuracy <- mean(glm.pred2 == Weekly$Direction[test_index])
print(paste("Test Accuracy:", round(test_accuracy, 4)))
## [1] "Test Accuracy: 0.625"
# Fit LDA model on training data
lda.fit <- lda(Direction ~ Lag2, data = Weekly, subset = train_index)
# Predict on test data
lda.pred <- predict(lda.fit, Weekly[test_index, ])
lda.class <- lda.pred$class
# Confusion matrix for LDA
conf_matrix_lda <- table(lda.class, Weekly$Direction[test_index])
print(conf_matrix_lda)
##
## lda.class Down Up
## Down 9 5
## Up 34 56
# Compute test accuracy
lda_accuracy <- mean(lda.class == Weekly$Direction[test_index])
print(paste("LDA Test Accuracy:", round(lda_accuracy, 4)))
## [1] "LDA Test Accuracy: 0.625"
# Fit QDA model on training data
qda.fit <- qda(Direction ~ Lag2, data = Weekly, subset = train_index)
# Predict on test data
qda.pred <- predict(qda.fit, Weekly[test_index, ])
qda.class <- qda.pred$class
# Confusion matrix for QDA
conf_matrix_qda <- table(qda.class, Weekly$Direction[test_index])
print(conf_matrix_qda)
##
## qda.class Down Up
## Down 0 0
## Up 43 61
# Compute test accuracy
qda_accuracy <- mean(qda.class == Weekly$Direction[test_index])
print(paste("QDA Test Accuracy:", round(qda_accuracy, 4)))
## [1] "QDA Test Accuracy: 0.5865"
# Prepare training and testing data for KNN
train_x <- Weekly[train_index, "Lag2", drop = FALSE]
test_x <- Weekly[test_index, "Lag2", drop = FALSE]
# Convert Direction to a binary class for KNN
train_y <- Weekly$Direction[train_index]
test_y <- Weekly$Direction[test_index]
# Fit KNN model with K=1
set.seed(1)
knn.pred <- knn(train_x, test_x, train_y, k = 1)
# Confusion matrix for KNN
conf_matrix_knn <- table(knn.pred, test_y)
print(conf_matrix_knn)
## test_y
## knn.pred Down Up
## Down 21 30
## Up 22 31
# Compute test accuracy
knn_accuracy <- mean(knn.pred == test_y)
print(paste("KNN Test Accuracy (K=1):", round(knn_accuracy, 4)))
## [1] "KNN Test Accuracy (K=1): 0.5"
# Fit Naive Bayes model
nb.fit <- naiveBayes(Direction ~ Lag2, data = Weekly, subset = train_index)
# Predict on test data
nb.pred <- predict(nb.fit, Weekly[test_index, ])
# Confusion matrix for Naive Bayes
conf_matrix_nb <- table(nb.pred, Weekly$Direction[test_index])
print(conf_matrix_nb)
##
## nb.pred Down Up
## Down 0 0
## Up 43 61
# Compute test accuracy
nb_accuracy <- mean(nb.pred == Weekly$Direction[test_index])
print(paste("Naive Bayes Test Accuracy:", round(nb_accuracy, 4)))
## [1] "Naive Bayes Test Accuracy: 0.5865"
Both LDA and Logistic Regression were the best performers
accuracy_results <- data.frame(Method = c("Logistic Regression", "LDA", "QDA", "KNN (K=1)", "Naive Bayes"),
Accuracy = c(test_accuracy, lda_accuracy, qda_accuracy, knn_accuracy, nb_accuracy))
accuracy_results
## Method Accuracy
## 1 Logistic Regression 0.6250000
## 2 LDA 0.6250000
## 3 QDA 0.5865385
## 4 KNN (K=1) 0.5000000
## 5 Naive Bayes 0.5865385
glm.fit3 <- glm(Direction ~ Lag2 + Lag1 + I(Lag2^2) + Volume, data = Weekly,
family = binomial, subset = train_index)
summary(glm.fit3)
##
## Call:
## glm(formula = Direction ~ Lag2 + Lag1 + I(Lag2^2) + Volume, family = binomial,
## data = Weekly, subset = train_index)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.287097 0.091910 3.124 0.00179 **
## Lag2 0.054652 0.030130 1.814 0.06970 .
## Lag1 -0.061893 0.029276 -2.114 0.03451 *
## I(Lag2^2) 0.006756 0.004718 1.432 0.15216
## Volume -0.090157 0.054444 -1.656 0.09773 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1354.7 on 984 degrees of freedom
## Residual deviance: 1343.0 on 980 degrees of freedom
## AIC: 1353
##
## Number of Fisher Scoring iterations: 4
# Load dataset
data(Auto)
Auto$mpg01 <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
summary(Auto$mpg01)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 0.5 0.5 1.0 1.0
boxplot(Auto$mpg ~ Auto$cylinders, main="MPG vs Cylinders")
boxplot(Auto$mpg ~ Auto$horsepower, main="MPG vs Horsepower")
boxplot(Auto$mpg ~ Auto$weight, main="MPG vs Weight")
pairs(Auto)
set.seed(42)
train_idx <- sample(1:nrow(Auto), 0.7 * nrow(Auto))
train_data <- Auto[train_idx, ]
test_data <- Auto[-train_idx, ]
lda_model1 <- lda(mpg01 ~ cylinders + horsepower + weight, data = train_data)
pred_lda <- predict(lda_model1, test_data)
conf_matrix_lda <- table(Predicted = pred_lda$class, Actual = test_data$mpg01)
conf_matrix_lda
## Actual
## Predicted 0 1
## 0 43 5
## 1 4 66
accuracy_lda <- sum(diag(conf_matrix_lda)) / sum(conf_matrix_lda)
accuracy_lda
## [1] 0.9237288
qda_model1 <- qda(mpg01 ~ cylinders + horsepower + weight, data = train_data)
pred_qda <- predict(qda_model1, test_data)
conf_matrix_qda <- table(Predicted = pred_qda$class, Actual = test_data$mpg01)
conf_matrix_qda
## Actual
## Predicted 0 1
## 0 43 6
## 1 4 65
accuracy_qda <- sum(diag(conf_matrix_qda)) / sum(conf_matrix_qda)
accuracy_qda
## [1] 0.9152542
logistic_model <- glm(mpg01 ~ cylinders + horsepower + weight, data = train_data, family = binomial)
pred_logistic <- predict(logistic_model, test_data, type = "response")
pred_class_logistic <- ifelse(pred_logistic > 0.5, 1, 0)
conf_matrix_logistic <- table(Predicted = pred_class_logistic, Actual = test_data$mpg01)
conf_matrix_logistic
## Actual
## Predicted 0 1
## 0 44 5
## 1 3 66
accuracy_logistic <- sum(diag(conf_matrix_logistic)) / sum(conf_matrix_logistic)
accuracy_logistic
## [1] 0.9322034
# Convert mpg01 to factor (required for Naive Bayes)
train_data$mpg01 <- as.factor(train_data$mpg01)
test_data$mpg01 <- as.factor(test_data$mpg01)
# Train the Naive Bayes model
nb_model1 <- naive_bayes(mpg01 ~ cylinders + horsepower + weight, data = train_data)
# Predict on test data
pred_nb <- predict(nb_model1, test_data)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
# Create confusion matrix
conf_matrix_nb <- table(Predicted = pred_nb, Actual = test_data$mpg01)
conf_matrix_nb
## Actual
## Predicted 0 1
## 0 43 5
## 1 4 66
# Calculate accuracy
accuracy_nb <- sum(diag(conf_matrix_nb)) / sum(conf_matrix_nb)
accuracy_nb
## [1] 0.9237288
k_train <- train_data[, c("cylinders", "horsepower", "weight")]
k_test <- test_data[, c("cylinders", "horsepower", "weight")]
k_train_labels <- train_data$mpg01
set.seed(42)
k_values <- c(1, 3, 5, 7, 10)
k_results <- data.frame(K = integer(), Accuracy = numeric())
for (k in k_values) {
knn_pred <- knn(as.matrix(k_train), as.matrix(k_test), k_train_labels, k = k)
conf_matrix_knn <- table(Predicted = knn_pred, Actual = test_data$mpg01)
accuracy_knn <- sum(diag(conf_matrix_knn)) / sum(conf_matrix_knn)
k_results <- rbind(k_results, data.frame(K = k, Accuracy = accuracy_knn))
}
k_results
## K Accuracy
## 1 1 0.8389831
## 2 3 0.8813559
## 3 5 0.8559322
## 4 7 0.8389831
## 5 10 0.8474576
accuracy_results <- data.frame(Method = c("LDA", "QDA", "Logistic Regression", "Naive Bayes", "Best KNN"),
Accuracy = c(accuracy_lda, accuracy_qda, accuracy_logistic, accuracy_nb, max(k_results$Accuracy)))
accuracy_results
## Method Accuracy
## 1 LDA 0.9237288
## 2 QDA 0.9152542
## 3 Logistic Regression 0.9322034
## 4 Naive Bayes 0.9237288
## 5 Best KNN 0.8813559
data(Boston)
Boston$crime01 <- ifelse(Boston$crim > median(Boston$crim), 1, 0)
summary(Boston$crime01)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 0.5 0.5 1.0 1.0
boxplot(Boston$crime01 ~ Boston$rm, main="Crime Rate vs Rooms")
boxplot(Boston$crime01 ~ Boston$dis, main="Crime Rate vs Distance to Employment Centers")
boxplot(Boston$crime01 ~ Boston$lstat, main="Crime Rate vs Lower Status Population")
pairs(Boston[, c("crime01", "rm", "dis", "lstat", "age", "tax")])
set.seed(42)
train_idx <- sample(1:nrow(Boston), 0.7 * nrow(Boston))
train_data <- Boston[train_idx, ]
test_data <- Boston[-train_idx, ]
logistic_model <- glm(crime01 ~ rm + dis + lstat + tax, data = train_data, family = binomial)
pred_logistic <- predict(logistic_model, test_data, type = "response")
pred_class_logistic <- ifelse(pred_logistic > 0.5, 1, 0)
conf_matrix_logistic <- table(Predicted = pred_class_logistic, Actual = test_data$crime01)
conf_matrix_logistic
## Actual
## Predicted 0 1
## 0 72 18
## 1 6 56
accuracy_logistic <- sum(diag(conf_matrix_logistic)) / sum(conf_matrix_logistic)
accuracy_logistic
## [1] 0.8421053
lda_model2 <- lda(crime01 ~ rm + dis + lstat + tax, data = train_data)
pred_lda <- predict(lda_model2, test_data)
conf_matrix_lda <- table(Predicted = pred_lda$class, Actual = test_data$crime01)
conf_matrix_lda
## Actual
## Predicted 0 1
## 0 73 19
## 1 5 55
accuracy_lda <- sum(diag(conf_matrix_lda)) / sum(conf_matrix_lda)
accuracy_lda
## [1] 0.8421053
# Convert crime01 to factor (required for Naive Bayes)
train_data$crime01 <- as.factor(train_data$crime01)
test_data$crime01 <- as.factor(test_data$crime01)
# Train the Naive Bayes model
nb_model2 <- naive_bayes(crime01 ~ rm + dis + lstat + tax, data = train_data)
# Predict on test data
pred_nb <- predict(nb_model2, test_data, type = "class")
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
# Create confusion matrix
conf_matrix_nb <- table(Predicted = pred_nb, Actual = test_data$crime01)
conf_matrix_nb
## Actual
## Predicted 0 1
## 0 73 17
## 1 5 57
# Calculate accuracy
accuracy_nb <- sum(diag(conf_matrix_nb)) / sum(conf_matrix_nb)
accuracy_nb
## [1] 0.8552632
k_train <- train_data[, c("rm", "dis", "lstat", "tax")]
k_test <- test_data[, c("rm", "dis", "lstat", "tax")]
k_train_labels <- train_data$crime01
set.seed(42)
k_values <- c(1, 3, 5, 7, 10)
k_results <- data.frame(K = integer(), Accuracy = numeric())
for (k in k_values) {
knn_pred <- knn(as.matrix(k_train), as.matrix(k_test), k_train_labels, k = k)
conf_matrix_knn <- table(Predicted = knn_pred, Actual = test_data$crime01)
accuracy_knn <- sum(diag(conf_matrix_knn)) / sum(conf_matrix_knn)
k_results <- rbind(k_results, data.frame(K = k, Accuracy = accuracy_knn))
}
k_results
## K Accuracy
## 1 1 0.9736842
## 2 3 0.9671053
## 3 5 0.9473684
## 4 7 0.9473684
## 5 10 0.9210526
accuracy_results <- data.frame(Method = c("Logistic Regression", "LDA", "Naive Bayes", "Best KNN"),
Accuracy = c(accuracy_logistic, accuracy_lda, accuracy_nb, max(k_results$Accuracy)))
accuracy_results
## Method Accuracy
## 1 Logistic Regression 0.8421053
## 2 LDA 0.8421053
## 3 Naive Bayes 0.8552632
## 4 Best KNN 0.9736842