Question 13: Logistic Regression and Classification on Weekly Dataset

library(ISLR2)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
## 
##     Boston
library(class)
library(naivebayes)
## naivebayes 1.0.0 loaded
## For more information please visit:
## https://majkamichal.github.io/naivebayes/
library(corrplot)
## corrplot 0.95 loaded
library(e1071)

(a) Numerical and Graphical Summaries

data(Weekly)
summary(Weekly)
##       Year           Lag1               Lag2               Lag3         
##  Min.   :1990   Min.   :-18.1950   Min.   :-18.1950   Min.   :-18.1950  
##  1st Qu.:1995   1st Qu.: -1.1540   1st Qu.: -1.1540   1st Qu.: -1.1580  
##  Median :2000   Median :  0.2410   Median :  0.2410   Median :  0.2410  
##  Mean   :2000   Mean   :  0.1506   Mean   :  0.1511   Mean   :  0.1472  
##  3rd Qu.:2005   3rd Qu.:  1.4050   3rd Qu.:  1.4090   3rd Qu.:  1.4090  
##  Max.   :2010   Max.   : 12.0260   Max.   : 12.0260   Max.   : 12.0260  
##       Lag4               Lag5              Volume            Today         
##  Min.   :-18.1950   Min.   :-18.1950   Min.   :0.08747   Min.   :-18.1950  
##  1st Qu.: -1.1580   1st Qu.: -1.1660   1st Qu.:0.33202   1st Qu.: -1.1540  
##  Median :  0.2380   Median :  0.2340   Median :1.00268   Median :  0.2410  
##  Mean   :  0.1458   Mean   :  0.1399   Mean   :1.57462   Mean   :  0.1499  
##  3rd Qu.:  1.4090   3rd Qu.:  1.4050   3rd Qu.:2.05373   3rd Qu.:  1.4050  
##  Max.   : 12.0260   Max.   : 12.0260   Max.   :9.32821   Max.   : 12.0260  
##  Direction 
##  Down:484  
##  Up  :605  
##            
##            
##            
## 
pairs(Weekly)

str(`Weekly`)
## 'data.frame':    1089 obs. of  9 variables:
##  $ Year     : num  1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
##  $ Lag1     : num  0.816 -0.27 -2.576 3.514 0.712 ...
##  $ Lag2     : num  1.572 0.816 -0.27 -2.576 3.514 ...
##  $ Lag3     : num  -3.936 1.572 0.816 -0.27 -2.576 ...
##  $ Lag4     : num  -0.229 -3.936 1.572 0.816 -0.27 ...
##  $ Lag5     : num  -3.484 -0.229 -3.936 1.572 0.816 ...
##  $ Volume   : num  0.155 0.149 0.16 0.162 0.154 ...
##  $ Today    : num  -0.27 -2.576 3.514 0.712 1.178 ...
##  $ Direction: Factor w/ 2 levels "Down","Up": 1 1 2 2 2 1 2 2 2 1 ...
weekly_numeric <- Weekly[, sapply(Weekly, is.numeric)]  # Select numeric columns
cor_matrix <- cor(weekly_numeric)
print(cor_matrix)
##               Year         Lag1        Lag2        Lag3         Lag4
## Year    1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1   -0.03228927  1.000000000 -0.07485305  0.05863568 -0.071273876
## Lag2   -0.03339001 -0.074853051  1.00000000 -0.07572091  0.058381535
## Lag3   -0.03000649  0.058635682 -0.07572091  1.00000000 -0.075395865
## Lag4   -0.03112792 -0.071273876  0.05838153 -0.07539587  1.000000000
## Lag5   -0.03051910 -0.008183096 -0.07249948  0.06065717 -0.075675027
## Volume  0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today  -0.03245989 -0.075031842  0.05916672 -0.07124364 -0.007825873
##                Lag5      Volume        Today
## Year   -0.030519101  0.84194162 -0.032459894
## Lag1   -0.008183096 -0.06495131 -0.075031842
## Lag2   -0.072499482 -0.08551314  0.059166717
## Lag3    0.060657175 -0.06928771 -0.071243639
## Lag4   -0.075675027 -0.06107462 -0.007825873
## Lag5    1.000000000 -0.05851741  0.011012698
## Volume -0.058517414  1.00000000 -0.033077783
## Today   0.011012698 -0.03307778  1.000000000
attach(`Weekly`)
plot(Volume)

corrplot(cor(Weekly[, -9]), diag = FALSE)

(b) Logistic Regression on Full Data

glm.fits=glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data=Weekly,family=binomial )
summary (glm.fits)
## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
##     Volume, family = binomial, data = Weekly)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.26686    0.08593   3.106   0.0019 **
## Lag1        -0.04127    0.02641  -1.563   0.1181   
## Lag2         0.05844    0.02686   2.175   0.0296 * 
## Lag3        -0.01606    0.02666  -0.602   0.5469   
## Lag4        -0.02779    0.02646  -1.050   0.2937   
## Lag5        -0.01447    0.02638  -0.549   0.5833   
## Volume      -0.02274    0.03690  -0.616   0.5377   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1496.2  on 1088  degrees of freedom
## Residual deviance: 1486.4  on 1082  degrees of freedom
## AIC: 1500.4
## 
## Number of Fisher Scoring iterations: 4
coef(glm.fits)
## (Intercept)        Lag1        Lag2        Lag3        Lag4        Lag5 
##  0.26686414 -0.04126894  0.05844168 -0.01606114 -0.02779021 -0.01447206 
##      Volume 
## -0.02274153
summary(glm.fits)$coef
##                Estimate Std. Error    z value    Pr(>|z|)
## (Intercept)  0.26686414 0.08592961  3.1056134 0.001898848
## Lag1        -0.04126894 0.02641026 -1.5626099 0.118144368
## Lag2         0.05844168 0.02686499  2.1753839 0.029601361
## Lag3        -0.01606114 0.02666299 -0.6023760 0.546923890
## Lag4        -0.02779021 0.02646332 -1.0501409 0.293653342
## Lag5        -0.01447206 0.02638478 -0.5485006 0.583348244
## Volume      -0.02274153 0.03689812 -0.6163330 0.537674762
summary (glm.fits)$coef[,4]
## (Intercept)        Lag1        Lag2        Lag3        Lag4        Lag5 
## 0.001898848 0.118144368 0.029601361 0.546923890 0.293653342 0.583348244 
##      Volume 
## 0.537674762
glm.probs=predict(glm.fits,type="response")
glm.probs 
##         1         2         3         4         5         6         7         8 
## 0.6086249 0.6010314 0.5875699 0.4816416 0.6169013 0.5684190 0.5786097 0.5151972 
##         9        10        11        12        13        14        15        16 
## 0.5715200 0.5554287 0.6092096 0.5370125 0.5125263 0.5858984 0.6248679 0.5475215 
##        17        18        19        20        21        22        23        24 
## 0.4993612 0.5929660 0.5235241 0.5914015 0.5233199 0.5793948 0.5487893 0.6177056 
##        25        26        27        28        29        30        31        32 
## 0.5527999 0.5460343 0.5685318 0.5803125 0.6172036 0.4934650 0.6555741 0.6171904 
##        33        34        35        36        37        38        39        40 
## 0.5463122 0.5511387 0.5427226 0.6511888 0.4799924 0.6554093 0.5078026 0.5874222 
##        41        42        43        44        45        46        47        48 
## 0.5561466 0.5779499 0.5162131 0.5732572 0.5844682 0.5256940 0.5780687 0.5661878 
##        49        50        51        52        53        54        55        56 
## 0.5442368 0.4940888 0.6542135 0.5601120 0.5109621 0.5705423 0.5763342 0.4870961 
##        57        58        59        60        61        62        63        64 
## 0.5398895 0.5777228 0.5662011 0.5083862 0.5994140 0.5563850 0.5656076 0.5803157 
##        65        66        67        68        69        70        71        72 
## 0.5276638 0.5793301 0.5591107 0.5455255 0.5626275 0.6424553 0.4996649 0.5716244 
##        73        74        75        76        77        78        79        80 
## 0.5696691 0.5414298 0.5723122 0.5912072 0.5833957 0.5178605 0.5794945 0.5648539 
##        81        82        83        84        85        86        87        88 
## 0.5284921 0.5898992 0.5801749 0.5416755 0.5294122 0.6018926 0.5809868 0.5468688 
##        89        90        91        92        93        94        95        96 
## 0.5407561 0.6363699 0.5079991 0.5762251 0.5940991 0.5378897 0.5444802 0.5743682 
##        97        98        99       100       101       102       103       104 
## 0.5874124 0.5822334 0.5117216 0.5888020 0.5910009 0.4909972 0.5507675 0.5619728 
##       105       106       107       108       109       110       111       112 
## 0.5365457 0.5784235 0.5839204 0.5588908 0.5865144 0.5321418 0.5632517 0.6142641 
##       113       114       115       116       117       118       119       120 
## 0.5423347 0.5482773 0.5555243 0.6298098 0.5174470 0.5533264 0.5897124 0.5315836 
##       121       122       123       124       125       126       127       128 
## 0.5725021 0.5773067 0.5652132 0.5634758 0.5499618 0.5571037 0.6009711 0.5702637 
##       129       130       131       132       133       134       135       136 
## 0.5619960 0.5065742 0.6207185 0.5383340 0.5685396 0.5446738 0.5674001 0.5743053 
##       137       138       139       140       141       142       143       144 
## 0.5677472 0.5910519 0.5363138 0.5714149 0.5295336 0.6119773 0.5696859 0.5728145 
##       145       146       147       148       149       150       151       152 
## 0.5330210 0.5631200 0.5645594 0.5613684 0.5532959 0.5417279 0.5860699 0.5586863 
##       153       154       155       156       157       158       159       160 
## 0.5551734 0.5231944 0.6073706 0.5614204 0.5433143 0.6001256 0.5618151 0.4948691 
##       161       162       163       164       165       166       167       168 
## 0.5962508 0.5762173 0.5675713 0.5564650 0.5635280 0.5416300 0.5589458 0.6246984 
##       169       170       171       172       173       174       175       176 
## 0.5169751 0.5695539 0.5875498 0.5438176 0.5721820 0.5760457 0.5594479 0.5524302 
##       177       178       179       180       181       182       183       184 
## 0.5435552 0.5888784 0.5582854 0.5758776 0.5517775 0.5669257 0.5676329 0.5622920 
##       185       186       187       188       189       190       191       192 
## 0.5527186 0.5714571 0.5691234 0.5524204 0.5602703 0.5535162 0.5542721 0.5833821 
##       193       194       195       196       197       198       199       200 
## 0.5421788 0.6025984 0.5261215 0.5888043 0.5242834 0.5936378 0.5590675 0.5621955 
##       201       202       203       204       205       206       207       208 
## 0.5716880 0.5561803 0.5680635 0.5682127 0.5503054 0.5618859 0.5779807 0.5468416 
##       209       210       211       212       213       214       215       216 
## 0.5854490 0.5300383 0.5728903 0.5699314 0.5706740 0.5614676 0.5650816 0.6028580 
##       217       218       219       220       221       222       223       224 
## 0.5596448 0.5156007 0.5952559 0.5868019 0.5721252 0.5812230 0.5589218 0.5244432 
##       225       226       227       228       229       230       231       232 
## 0.5993803 0.5647461 0.5612108 0.5459016 0.5937441 0.5076433 0.5832752 0.5855495 
##       233       234       235       236       237       238       239       240 
## 0.5857413 0.5380450 0.5749373 0.5438053 0.5702257 0.5413638 0.5940256 0.5473134 
##       241       242       243       244       245       246       247       248 
## 0.5354131 0.5969382 0.5261704 0.5978580 0.5213158 0.6272494 0.5286936 0.6045370 
##       249       250       251       252       253       254       255       256 
## 0.5169829 0.5664237 0.5915468 0.5430643 0.5908881 0.5305143 0.6107822 0.5677341 
##       257       258       259       260       261       262       263       264 
## 0.5456921 0.5467197 0.5822734 0.5422839 0.5557904 0.5766811 0.5575591 0.5342318 
##       265       266       267       268       269       270       271       272 
## 0.5776740 0.5398437 0.5574790 0.5666197 0.5720399 0.5363851 0.5637602 0.5656644 
##       273       274       275       276       277       278       279       280 
## 0.5400001 0.5639787 0.5630773 0.5796036 0.5223389 0.5534837 0.5983258 0.5205742 
##       281       282       283       284       285       286       287       288 
## 0.5668187 0.5906810 0.5097030 0.5714526 0.5757395 0.5169049 0.5881619 0.5602070 
##       289       290       291       292       293       294       295       296 
## 0.5423076 0.5752488 0.5644755 0.5548749 0.5615064 0.5824449 0.5350584 0.5571488 
##       297       298       299       300       301       302       303       304 
## 0.5495397 0.5632003 0.5838696 0.5227471 0.5880212 0.5566692 0.5729664 0.5377796 
##       305       306       307       308       309       310       311       312 
## 0.5536240 0.5815944 0.5538216 0.5325246 0.5697948 0.5933091 0.5093568 0.5779388 
##       313       314       315       316       317       318       319       320 
## 0.5731189 0.5547897 0.5970898 0.4932252 0.5858283 0.5391747 0.5281452 0.5832704 
##       321       322       323       324       325       326       327       328 
## 0.6066850 0.5280506 0.6048338 0.5019399 0.5728660 0.6095074 0.5172612 0.5547660 
##       329       330       331       332       333       334       335       336 
## 0.5877458 0.5840046 0.5081875 0.5708770 0.5484547 0.5655005 0.5971151 0.5537199 
##       337       338       339       340       341       342       343       344 
## 0.5539844 0.5702030 0.5384996 0.6387192 0.5493983 0.5415859 0.5751379 0.5217641 
##       345       346       347       348       349       350       351       352 
## 0.5390281 0.6197944 0.5680755 0.5073720 0.5776221 0.5360180 0.5845579 0.5265423 
##       353       354       355       356       357       358       359       360 
## 0.5262954 0.6115952 0.5487298 0.5432930 0.5768577 0.5275021 0.5095094 0.6109677 
##       361       362       363       364       365       366       367       368 
## 0.5979607 0.5127032 0.5506223 0.5997847 0.5169115 0.5706009 0.5342584 0.5931879 
##       369       370       371       372       373       374       375       376 
## 0.5457038 0.5105159 0.6061357 0.5591364 0.5588375 0.5737989 0.5793927 0.5063235 
##       377       378       379       380       381       382       383       384 
## 0.6475012 0.5083097 0.6203323 0.5407460 0.5029741 0.5576154 0.5364731 0.5207690 
##       385       386       387       388       389       390       391       392 
## 0.6028332 0.5604089 0.4757713 0.5974355 0.5574903 0.5164057 0.5800462 0.5815701 
##       393       394       395       396       397       398       399       400 
## 0.5578011 0.4768951 0.6445267 0.5110922 0.6207023 0.5214015 0.5989503 0.5141917 
##       401       402       403       404       405       406       407       408 
## 0.5758607 0.5755234 0.5181782 0.5874211 0.5238373 0.6035994 0.5413394 0.6246353 
##       409       410       411       412       413       414       415       416 
## 0.5002412 0.6136433 0.5050945 0.5601064 0.5192467 0.6844858 0.4482419 0.6133784 
##       417       418       419       420       421       422       423       424 
## 0.5366787 0.5572317 0.5829508 0.5318511 0.5340025 0.5553525 0.5410862 0.5338357 
##       425       426       427       428       429       430       431       432 
## 0.5929422 0.5103211 0.5868051 0.5181872 0.5800338 0.5255719 0.5930736 0.5464041 
##       433       434       435       436       437       438       439       440 
## 0.5636584 0.5865046 0.5189682 0.6119890 0.5446836 0.5321998 0.5942007 0.5558781 
##       441       442       443       444       445       446       447       448 
## 0.5400075 0.6043057 0.5017929 0.5614943 0.5739404 0.5465742 0.6683375 0.5615654 
##       449       450       451       452       453       454       455       456 
## 0.4660692 0.6486217 0.5920476 0.6255748 0.4908835 0.4554909 0.6742924 0.5530564 
##       457       458       459       460       461       462       463       464 
## 0.5111866 0.5859531 0.4701471 0.5558328 0.5928991 0.5241571 0.5064317 0.5605027 
##       465       466       467       468       469       470       471       472 
## 0.6093877 0.5040861 0.6103963 0.5121752 0.4780138 0.6627907 0.5255695 0.5311087 
##       473       474       475       476       477       478       479       480 
## 0.5819564 0.5438847 0.5873708 0.5657513 0.5540727 0.5122585 0.5283033 0.6453425 
##       481       482       483       484       485       486       487       488 
## 0.4815798 0.5951610 0.5191683 0.5723108 0.5580248 0.5783080 0.5142229 0.6297825 
##       489       490       491       492       493       494       495       496 
## 0.4945390 0.6399040 0.4680441 0.6260839 0.5409826 0.5867130 0.4891312 0.5605466 
##       497       498       499       500       501       502       503       504 
## 0.5439818 0.6230237 0.5768180 0.5587314 0.5601920 0.5573860 0.5826234 0.4990125 
##       505       506       507       508       509       510       511       512 
## 0.5522657 0.7100983 0.4165974 0.5719054 0.6348528 0.5249158 0.5192383 0.5650196 
##       513       514       515       516       517       518       519       520 
## 0.5213008 0.5711516 0.5321640 0.5365227 0.5954012 0.5819323 0.4939905 0.5929459 
##       521       522       523       524       525       526       527       528 
## 0.5973992 0.4321437 0.6768488 0.5786161 0.5252752 0.5005390 0.6790260 0.4881697 
##       529       530       531       532       533       534       535       536 
## 0.5568162 0.6076837 0.4723601 0.6423898 0.3454964 0.6664303 0.6359851 0.5431683 
##       537       538       539       540       541       542       543       544 
## 0.5360510 0.5759321 0.4713744 0.6893839 0.5246529 0.5477103 0.5094661 0.5653308 
##       545       546       547       548       549       550       551       552 
## 0.5689184 0.6035017 0.5504700 0.4576816 0.6195025 0.5793218 0.5626917 0.5458301 
##       553       554       555       556       557       558       559       560 
## 0.5768882 0.5396859 0.5412363 0.5677811 0.5909953 0.5755163 0.5243599 0.6208475 
##       561       562       563       564       565       566       567       568 
## 0.5240309 0.6524718 0.4860934 0.5803736 0.5702262 0.5103699 0.6766865 0.5065701 
##       569       570       571       572       573       574       575       576 
## 0.5380064 0.6050962 0.5333236 0.5634127 0.5775587 0.5654597 0.5589410 0.5216716 
##       577       578       579       580       581       582       583       584 
## 0.5976741 0.5303480 0.5803302 0.6588484 0.4926406 0.5579756 0.6587106 0.4929260 
##       585       586       587       588       589       590       591       592 
## 0.5796839 0.6159637 0.5164082 0.5379362 0.4694882 0.6198054 0.5508937 0.5225409 
##       593       594       595       596       597       598       599       600 
## 0.6041745 0.5036198 0.5908384 0.6098257 0.5060939 0.6015740 0.5692736 0.5438907 
##       601       602       603       604       605       606       607       608 
## 0.5877794 0.5577073 0.5105280 0.6507249 0.5548131 0.6180004 0.3596924 0.7163238 
##       609       610       611       612       613       614       615       616 
## 0.6394566 0.5779706 0.4493578 0.5998973 0.4977878 0.5783635 0.5621367 0.5616890 
##       617       618       619       620       621       622       623       624 
## 0.5013058 0.6036959 0.4901973 0.5796923 0.5766799 0.5883556 0.5208045 0.5276387 
##       625       626       627       628       629       630       631       632 
## 0.5927599 0.5839588 0.5245479 0.5960699 0.5168693 0.5921110 0.5895644 0.5436214 
##       633       634       635       636       637       638       639       640 
## 0.5049412 0.5745288 0.5487506 0.5457591 0.6382133 0.5079525 0.5848666 0.5104466 
##       641       642       643       644       645       646       647       648 
## 0.6706126 0.5391181 0.5548078 0.5274079 0.5812275 0.5681988 0.5939935 0.6422907 
##       649       650       651       652       653       654       655       656 
## 0.5425670 0.4583355 0.6285521 0.6013063 0.6278461 0.5461432 0.5568079 0.5065030 
##       657       658       659       660       661       662       663       664 
## 0.5232405 0.6239058 0.5349621 0.5900956 0.5089219 0.6047730 0.6375462 0.5342957 
##       665       666       667       668       669       670       671       672 
## 0.5080706 0.4982668 0.5531530 0.5822630 0.5751894 0.5230775 0.5126628 0.6194379 
##       673       674       675       676       677       678       679       680 
## 0.5117847 0.6037463 0.6138306 0.5368959 0.4824428 0.6085781 0.5518537 0.5846550 
##       681       682       683       684       685       686       687       688 
## 0.6100005 0.5622388 0.5212453 0.4937156 0.7045398 0.4591680 0.5579273 0.5019379 
##       689       690       691       692       693       694       695       696 
## 0.5989802 0.5210005 0.5864058 0.5231227 0.5599086 0.4870586 0.5737534 0.5846341 
##       697       698       699       700       701       702       703       704 
## 0.5235577 0.5596720 0.5079938 0.5618378 0.5876116 0.5412792 0.5746223 0.5313397 
##       705       706       707       708       709       710       711       712 
## 0.5484921 0.5876204 0.5516370 0.5568272 0.5680953 0.5203460 0.6079205 0.4587737 
##       713       714       715       716       717       718       719       720 
## 0.6024785 0.5758942 0.5577907 0.5034688 0.5865739 0.5632888 0.5576696 0.5086788 
##       721       722       723       724       725       726       727       728 
## 0.5936381 0.5531257 0.5497506 0.5597227 0.5435763 0.5450462 0.5444482 0.5634503 
##       729       730       731       732       733       734       735       736 
## 0.5494791 0.5178417 0.5672053 0.5651662 0.5505195 0.5440208 0.6037956 0.5182241 
##       737       738       739       740       741       742       743       744 
## 0.5509473 0.5462722 0.6218541 0.5518285 0.5267678 0.5865525 0.5246443 0.5584827 
##       745       746       747       748       749       750       751       752 
## 0.5778633 0.5475059 0.5985855 0.5416472 0.5613601 0.5420462 0.5574981 0.5555546 
##       753       754       755       756       757       758       759       760 
## 0.5572137 0.5672736 0.5380961 0.6288296 0.5162164 0.5370392 0.6142804 0.5667208 
##       761       762       763       764       765       766       767       768 
## 0.5320097 0.5491606 0.5712877 0.5045668 0.5945248 0.5613639 0.5468458 0.5125795 
##       769       770       771       772       773       774       775       776 
## 0.5856365 0.5878151 0.5619301 0.4927447 0.5478086 0.5693274 0.5413631 0.5452481 
##       777       778       779       780       781       782       783       784 
## 0.5761716 0.5731165 0.5169958 0.5721833 0.5486374 0.5481036 0.6035000 0.5561117 
##       785       786       787       788       789       790       791       792 
## 0.5242339 0.5499644 0.5867428 0.5314951 0.5564833 0.5438242 0.5689854 0.6101141 
##       793       794       795       796       797       798       799       800 
## 0.4998552 0.5700161 0.5646784 0.5924735 0.4920364 0.5886151 0.5638022 0.5326469 
##       801       802       803       804       805       806       807       808 
## 0.5266755 0.5969437 0.5168311 0.5411947 0.5710588 0.5698966 0.5460172 0.5463105 
##       809       810       811       812       813       814       815       816 
## 0.5350782 0.5706830 0.5595445 0.5305365 0.5605804 0.5911755 0.5573610 0.5006451 
##       817       818       819       820       821       822       823       824 
## 0.5981353 0.5317577 0.5570797 0.5458900 0.5740883 0.5691984 0.5438528 0.5328946 
##       825       826       827       828       829       830       831       832 
## 0.5601641 0.5372154 0.5267937 0.5615436 0.5774812 0.4952175 0.5974479 0.5745732 
##       833       834       835       836       837       838       839       840 
## 0.4886085 0.5893925 0.5349565 0.5423141 0.5790393 0.5553924 0.5433329 0.5201113 
##       841       842       843       844       845       846       847       848 
## 0.5890816 0.5514397 0.5330581 0.5584058 0.5336897 0.5816505 0.5367150 0.5863620 
##       849       850       851       852       853       854       855       856 
## 0.5224365 0.5169490 0.5824923 0.6060852 0.5081011 0.5615662 0.5409679 0.6017141 
##       857       858       859       860       861       862       863       864 
## 0.5675841 0.5045995 0.5295374 0.6107446 0.5581926 0.4873935 0.5942473 0.5301539 
##       865       866       867       868       869       870       871       872 
## 0.5674075 0.5102382 0.5767224 0.5257364 0.5579780 0.5462655 0.5533285 0.5301312 
##       873       874       875       876       877       878       879       880 
## 0.5574106 0.5168306 0.5520166 0.5755018 0.5415534 0.5227594 0.5482981 0.5808434 
##       881       882       883       884       885       886       887       888 
## 0.5253596 0.5555041 0.5277809 0.5754684 0.5527539 0.5152885 0.5820282 0.5260608 
##       889       890       891       892       893       894       895       896 
## 0.5658781 0.5801068 0.4682125 0.5909513 0.5242807 0.6233820 0.5088109 0.5522289 
##       897       898       899       900       901       902       903       904 
## 0.5245540 0.5644530 0.5317816 0.5418341 0.5232540 0.5625742 0.5217368 0.5818423 
##       905       906       907       908       909       910       911       912 
## 0.4989459 0.5919640 0.5200675 0.5387161 0.5678082 0.5802998 0.5575884 0.4760558 
##       913       914       915       916       917       918       919       920 
## 0.5182760 0.6118128 0.5419495 0.5864565 0.5485419 0.4957682 0.5492068 0.5908790 
##       921       922       923       924       925       926       927       928 
## 0.5088339 0.5492171 0.5721235 0.4493546 0.6013925 0.5719251 0.4905404 0.5810776 
##       929       930       931       932       933       934       935       936 
## 0.5267599 0.5869964 0.5891485 0.4767770 0.5640925 0.5938866 0.4838119 0.5986703 
##       937       938       939       940       941       942       943       944 
## 0.4903157 0.5398586 0.6908610 0.4610420 0.5468855 0.5723763 0.5520243 0.5026530 
##       945       946       947       948       949       950       951       952 
## 0.5234972 0.6258337 0.4840350 0.6167381 0.4394534 0.5866459 0.5261577 0.5583840 
##       953       954       955       956       957       958       959       960 
## 0.4671105 0.6145592 0.4737788 0.5980399 0.5073452 0.5834843 0.5372300 0.5307699 
##       961       962       963       964       965       966       967       968 
## 0.5684054 0.5218672 0.5876939 0.5426882 0.5121505 0.5752169 0.5383800 0.5261866 
##       969       970       971       972       973       974       975       976 
## 0.5505270 0.4792343 0.5412403 0.5951428 0.5840288 0.5774733 0.2864474 0.7823952 
##       977       978       979       980       981       982       983       984 
## 0.4646948 0.7622202 0.5289881 0.4882306 0.3107741 0.7837046 0.5281769 0.4858185 
##       985       986       987       988       989       990       991       992 
## 0.5504902 0.4522655 0.6739877 0.4958246 0.4649993 0.5357007 0.5233753 0.6829693 
##       993       994       995       996       997       998       999      1000 
## 0.5229491 0.4564008 0.5715682 0.3959387 0.7280284 0.5090166 0.5321638 0.4871418 
##      1001      1002      1003      1004      1005      1006      1007      1008 
## 0.4748783 0.5010539 0.4837662 0.4647475 0.6549362 0.4223010 0.4776351 0.5748351 
##      1009      1010      1011      1012      1013      1014      1015      1016 
## 0.5655057 0.5383031 0.4701262 0.5620443 0.5412192 0.4606573 0.6206380 0.5818885 
##      1017      1018      1019      1020      1021      1022      1023      1024 
## 0.4604591 0.5194058 0.4781920 0.5464957 0.5402801 0.4789306 0.5398621 0.5927368 
##      1025      1026      1027      1028      1029      1030      1031      1032 
## 0.4998846 0.4506659 0.6027015 0.5696336 0.5319400 0.4217298 0.5811985 0.5949228 
##      1033      1034      1035      1036      1037      1038      1039      1040 
## 0.5286731 0.5008779 0.5550069 0.5339469 0.5124905 0.5907060 0.4938868 0.5786856 
##      1041      1042      1043      1044      1045      1046      1047      1048 
## 0.5515408 0.4859885 0.5325899 0.5584339 0.5523840 0.5990181 0.4897915 0.5510282 
##      1049      1050      1051      1052      1053      1054      1055      1056 
## 0.5253628 0.5223605 0.5193080 0.5295971 0.5436610 0.4947344 0.5746216 0.5399726 
##      1057      1058      1059      1060      1061      1062      1063      1064 
## 0.4143911 0.6376433 0.5164136 0.5854627 0.4980502 0.5751763 0.6151925 0.5175391 
##      1065      1066      1067      1068      1069      1070      1071      1072 
## 0.4025249 0.6635541 0.5127492 0.5800149 0.4985986 0.5892294 0.4777904 0.5436365 
##      1073      1074      1075      1076      1077      1078      1079      1080 
## 0.5196729 0.6175894 0.5279273 0.5191125 0.5531570 0.5044628 0.5370563 0.5358719 
##      1081      1082      1083      1084      1085      1086      1087      1088 
## 0.5371481 0.4876143 0.6079918 0.4940985 0.5415753 0.5012871 0.5797230 0.5509170 
##      1089 
## 0.5221216
contrasts(Direction)
##      Up
## Down  0
## Up    1
glm.pred=rep("Down" ,1089)
glm.pred[glm.probs >.5]=" Up"

(c) Confusion Matrix and Accuracy

table(glm.pred ,Direction )
##         Direction
## glm.pred Down  Up
##      Up   430 557
##     Down   54  48
(557+54)/1089
## [1] 0.5610652
mean(glm.pred==Direction )
## [1] 0.04958678

(d) Logistic Regression with Training Data (1990-2008) and Testing (2009-2010)

# Define training set (Years 1990-2008)
train_index <- Weekly$Year < 2009

# Define test set
test_index <- !train_index

# Train logistic regression on Lag2 only
glm.fit2 <- glm(Direction ~ Lag2, data = Weekly, family = binomial, subset = train_index)

# Predict on test data
glm.probs2 <- predict(glm.fit2, Weekly[test_index, ], type = "response")

# Convert probabilities to class labels
glm.pred2 <- ifelse(glm.probs2 > 0.5, "Up", "Down")

# Confusion matrix for test data
conf_matrix2 <- table(glm.pred2, Weekly$Direction[test_index])
print(conf_matrix2)
##          
## glm.pred2 Down Up
##      Down    9  5
##      Up     34 56
# Compute test accuracy
test_accuracy <- mean(glm.pred2 == Weekly$Direction[test_index])
print(paste("Test Accuracy:", round(test_accuracy, 4)))
## [1] "Test Accuracy: 0.625"

(e) Linear Discriminant Analysis (LDA)

# Fit LDA model on training data
lda.fit <- lda(Direction ~ Lag2, data = Weekly, subset = train_index)

# Predict on test data
lda.pred <- predict(lda.fit, Weekly[test_index, ])
lda.class <- lda.pred$class

# Confusion matrix for LDA
conf_matrix_lda <- table(lda.class, Weekly$Direction[test_index])
print(conf_matrix_lda)
##          
## lda.class Down Up
##      Down    9  5
##      Up     34 56
# Compute test accuracy
lda_accuracy <- mean(lda.class == Weekly$Direction[test_index])
print(paste("LDA Test Accuracy:", round(lda_accuracy, 4)))
## [1] "LDA Test Accuracy: 0.625"

(f) Quadratic Discriminant Analysis (QDA)

# Fit QDA model on training data
qda.fit <- qda(Direction ~ Lag2, data = Weekly, subset = train_index)

# Predict on test data
qda.pred <- predict(qda.fit, Weekly[test_index, ])
qda.class <- qda.pred$class

# Confusion matrix for QDA
conf_matrix_qda <- table(qda.class, Weekly$Direction[test_index])
print(conf_matrix_qda)
##          
## qda.class Down Up
##      Down    0  0
##      Up     43 61
# Compute test accuracy
qda_accuracy <- mean(qda.class == Weekly$Direction[test_index])
print(paste("QDA Test Accuracy:", round(qda_accuracy, 4)))
## [1] "QDA Test Accuracy: 0.5865"

(g) K-Nearest Neighbors (KNN) with K = 1

# Prepare training and testing data for KNN
train_x <- Weekly[train_index, "Lag2", drop = FALSE]
test_x <- Weekly[test_index, "Lag2", drop = FALSE]

# Convert Direction to a binary class for KNN
train_y <- Weekly$Direction[train_index]
test_y <- Weekly$Direction[test_index]

# Fit KNN model with K=1
set.seed(1)
knn.pred <- knn(train_x, test_x, train_y, k = 1)

# Confusion matrix for KNN
conf_matrix_knn <- table(knn.pred, test_y)
print(conf_matrix_knn)
##         test_y
## knn.pred Down Up
##     Down   21 30
##     Up     22 31
# Compute test accuracy
knn_accuracy <- mean(knn.pred == test_y)
print(paste("KNN Test Accuracy (K=1):", round(knn_accuracy, 4)))
## [1] "KNN Test Accuracy (K=1): 0.5"

(h) Naive Bayes Classifier

# Fit Naive Bayes model
nb.fit <- naiveBayes(Direction ~ Lag2, data = Weekly, subset = train_index)

# Predict on test data
nb.pred <- predict(nb.fit, Weekly[test_index, ])

# Confusion matrix for Naive Bayes
conf_matrix_nb <- table(nb.pred, Weekly$Direction[test_index])
print(conf_matrix_nb)
##        
## nb.pred Down Up
##    Down    0  0
##    Up     43 61
# Compute test accuracy
nb_accuracy <- mean(nb.pred == Weekly$Direction[test_index])
print(paste("Naive Bayes Test Accuracy:", round(nb_accuracy, 4)))
## [1] "Naive Bayes Test Accuracy: 0.5865"

(i) Best Performing Method

Both LDA and Logistic Regression were the best performers

accuracy_results <- data.frame(Method = c("Logistic Regression", "LDA", "QDA", "KNN (K=1)", "Naive Bayes"),
                               Accuracy = c(test_accuracy, lda_accuracy, qda_accuracy, knn_accuracy, nb_accuracy))
accuracy_results
##                Method  Accuracy
## 1 Logistic Regression 0.6250000
## 2                 LDA 0.6250000
## 3                 QDA 0.5865385
## 4           KNN (K=1) 0.5000000
## 5         Naive Bayes 0.5865385

(j) Experimenting with Different Predictors and Transformations

glm.fit3 <- glm(Direction ~ Lag2 + Lag1 + I(Lag2^2) + Volume, data = Weekly, 
                family = binomial, subset = train_index)

summary(glm.fit3)
## 
## Call:
## glm(formula = Direction ~ Lag2 + Lag1 + I(Lag2^2) + Volume, family = binomial, 
##     data = Weekly, subset = train_index)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.287097   0.091910   3.124  0.00179 **
## Lag2         0.054652   0.030130   1.814  0.06970 . 
## Lag1        -0.061893   0.029276  -2.114  0.03451 * 
## I(Lag2^2)    0.006756   0.004718   1.432  0.15216   
## Volume      -0.090157   0.054444  -1.656  0.09773 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1354.7  on 984  degrees of freedom
## Residual deviance: 1343.0  on 980  degrees of freedom
## AIC: 1353
## 
## Number of Fisher Scoring iterations: 4

Question 14

# Load dataset
data(Auto)

(a) Create binary mpg01 variable

Auto$mpg01 <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
summary(Auto$mpg01)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.5     0.5     1.0     1.0

(b) Graphical Exploration

boxplot(Auto$mpg ~ Auto$cylinders, main="MPG vs Cylinders")

boxplot(Auto$mpg ~ Auto$horsepower, main="MPG vs Horsepower")

boxplot(Auto$mpg ~ Auto$weight, main="MPG vs Weight")

pairs(Auto)

(c) Split data into training and test sets

set.seed(42)
train_idx <- sample(1:nrow(Auto), 0.7 * nrow(Auto))
train_data <- Auto[train_idx, ]
test_data <- Auto[-train_idx, ]

(d) Linear Discriminant Analysis (LDA)

lda_model1 <- lda(mpg01 ~ cylinders + horsepower + weight, data = train_data)
pred_lda <- predict(lda_model1, test_data)
conf_matrix_lda <- table(Predicted = pred_lda$class, Actual = test_data$mpg01)
conf_matrix_lda
##          Actual
## Predicted  0  1
##         0 43  5
##         1  4 66
accuracy_lda <- sum(diag(conf_matrix_lda)) / sum(conf_matrix_lda)
accuracy_lda
## [1] 0.9237288

(e) Quadratic Discriminant Analysis (QDA)

qda_model1 <- qda(mpg01 ~ cylinders + horsepower + weight, data = train_data)
pred_qda <- predict(qda_model1, test_data)
conf_matrix_qda <- table(Predicted = pred_qda$class, Actual = test_data$mpg01)
conf_matrix_qda
##          Actual
## Predicted  0  1
##         0 43  6
##         1  4 65
accuracy_qda <- sum(diag(conf_matrix_qda)) / sum(conf_matrix_qda)
accuracy_qda
## [1] 0.9152542

(f) Logistic Regression

logistic_model <- glm(mpg01 ~ cylinders + horsepower + weight, data = train_data, family = binomial)
pred_logistic <- predict(logistic_model, test_data, type = "response")
pred_class_logistic <- ifelse(pred_logistic > 0.5, 1, 0)
conf_matrix_logistic <- table(Predicted = pred_class_logistic, Actual = test_data$mpg01)
conf_matrix_logistic
##          Actual
## Predicted  0  1
##         0 44  5
##         1  3 66
accuracy_logistic <- sum(diag(conf_matrix_logistic)) / sum(conf_matrix_logistic)
accuracy_logistic
## [1] 0.9322034

(g) Naive Bayes Classifier

# Convert mpg01 to factor (required for Naive Bayes)
train_data$mpg01 <- as.factor(train_data$mpg01)
test_data$mpg01 <- as.factor(test_data$mpg01)

# Train the Naive Bayes model
nb_model1 <- naive_bayes(mpg01 ~ cylinders + horsepower + weight, data = train_data)

# Predict on test data
pred_nb <- predict(nb_model1, test_data)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
# Create confusion matrix
conf_matrix_nb <- table(Predicted = pred_nb, Actual = test_data$mpg01)
conf_matrix_nb
##          Actual
## Predicted  0  1
##         0 43  5
##         1  4 66
# Calculate accuracy
accuracy_nb <- sum(diag(conf_matrix_nb)) / sum(conf_matrix_nb)
accuracy_nb
## [1] 0.9237288

(h) K-Nearest Neighbors (KNN) with Different K Values

k_train <- train_data[, c("cylinders", "horsepower", "weight")]
k_test <- test_data[, c("cylinders", "horsepower", "weight")]
k_train_labels <- train_data$mpg01

set.seed(42)
k_values <- c(1, 3, 5, 7, 10)
k_results <- data.frame(K = integer(), Accuracy = numeric())

for (k in k_values) {
  knn_pred <- knn(as.matrix(k_train), as.matrix(k_test), k_train_labels, k = k)
  conf_matrix_knn <- table(Predicted = knn_pred, Actual = test_data$mpg01)
  accuracy_knn <- sum(diag(conf_matrix_knn)) / sum(conf_matrix_knn)
  k_results <- rbind(k_results, data.frame(K = k, Accuracy = accuracy_knn))
}
k_results
##    K  Accuracy
## 1  1 0.8389831
## 2  3 0.8813559
## 3  5 0.8559322
## 4  7 0.8389831
## 5 10 0.8474576

Best Performing Model Summary

accuracy_results <- data.frame(Method = c("LDA", "QDA", "Logistic Regression", "Naive Bayes", "Best KNN"),
                               Accuracy = c(accuracy_lda, accuracy_qda, accuracy_logistic, accuracy_nb, max(k_results$Accuracy)))
accuracy_results
##                Method  Accuracy
## 1                 LDA 0.9237288
## 2                 QDA 0.9152542
## 3 Logistic Regression 0.9322034
## 4         Naive Bayes 0.9237288
## 5            Best KNN 0.8813559

Question 16: Predicting Crime Rate in Boston Dataset

(a) Create Binary Crime Rate Variable

data(Boston)
Boston$crime01 <- ifelse(Boston$crim > median(Boston$crim), 1, 0)
summary(Boston$crime01)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.5     0.5     1.0     1.0

(b) Graphical Exploration

boxplot(Boston$crime01 ~ Boston$rm, main="Crime Rate vs Rooms")

boxplot(Boston$crime01 ~ Boston$dis, main="Crime Rate vs Distance to Employment Centers")

boxplot(Boston$crime01 ~ Boston$lstat, main="Crime Rate vs Lower Status Population")

pairs(Boston[, c("crime01", "rm", "dis", "lstat", "age", "tax")])

(c) Split Data into Training and Test Sets

set.seed(42)
train_idx <- sample(1:nrow(Boston), 0.7 * nrow(Boston))
train_data <- Boston[train_idx, ]
test_data <- Boston[-train_idx, ]

(d) Logistic Regression

logistic_model <- glm(crime01 ~ rm + dis + lstat + tax, data = train_data, family = binomial)
pred_logistic <- predict(logistic_model, test_data, type = "response")
pred_class_logistic <- ifelse(pred_logistic > 0.5, 1, 0)
conf_matrix_logistic <- table(Predicted = pred_class_logistic, Actual = test_data$crime01)
conf_matrix_logistic
##          Actual
## Predicted  0  1
##         0 72 18
##         1  6 56
accuracy_logistic <- sum(diag(conf_matrix_logistic)) / sum(conf_matrix_logistic)
accuracy_logistic
## [1] 0.8421053

(e) Linear Discriminant Analysis (LDA)

lda_model2 <- lda(crime01 ~ rm + dis + lstat + tax, data = train_data)
pred_lda <- predict(lda_model2, test_data)
conf_matrix_lda <- table(Predicted = pred_lda$class, Actual = test_data$crime01)
conf_matrix_lda
##          Actual
## Predicted  0  1
##         0 73 19
##         1  5 55
accuracy_lda <- sum(diag(conf_matrix_lda)) / sum(conf_matrix_lda)
accuracy_lda
## [1] 0.8421053

(f) Naive Bayes Classifier

# Convert crime01 to factor (required for Naive Bayes)
train_data$crime01 <- as.factor(train_data$crime01)
test_data$crime01 <- as.factor(test_data$crime01)

# Train the Naive Bayes model
nb_model2 <- naive_bayes(crime01 ~ rm + dis + lstat + tax, data = train_data)

# Predict on test data
pred_nb <- predict(nb_model2, test_data, type = "class")
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
# Create confusion matrix
conf_matrix_nb <- table(Predicted = pred_nb, Actual = test_data$crime01)
conf_matrix_nb
##          Actual
## Predicted  0  1
##         0 73 17
##         1  5 57
# Calculate accuracy
accuracy_nb <- sum(diag(conf_matrix_nb)) / sum(conf_matrix_nb)
accuracy_nb
## [1] 0.8552632

(g) K-Nearest Neighbors (KNN) with Different K Values

k_train <- train_data[, c("rm", "dis", "lstat", "tax")]
k_test <- test_data[, c("rm", "dis", "lstat", "tax")]
k_train_labels <- train_data$crime01

set.seed(42)
k_values <- c(1, 3, 5, 7, 10)
k_results <- data.frame(K = integer(), Accuracy = numeric())

for (k in k_values) {
  knn_pred <- knn(as.matrix(k_train), as.matrix(k_test), k_train_labels, k = k)
  conf_matrix_knn <- table(Predicted = knn_pred, Actual = test_data$crime01)
  accuracy_knn <- sum(diag(conf_matrix_knn)) / sum(conf_matrix_knn)
  k_results <- rbind(k_results, data.frame(K = k, Accuracy = accuracy_knn))
}
k_results
##    K  Accuracy
## 1  1 0.9736842
## 2  3 0.9671053
## 3  5 0.9473684
## 4  7 0.9473684
## 5 10 0.9210526

Best Performing Model Summary

accuracy_results <- data.frame(Method = c("Logistic Regression", "LDA", "Naive Bayes", "Best KNN"),
                               Accuracy = c(accuracy_logistic, accuracy_lda, accuracy_nb, max(k_results$Accuracy)))
accuracy_results
##                Method  Accuracy
## 1 Logistic Regression 0.8421053
## 2                 LDA 0.8421053
## 3         Naive Bayes 0.8552632
## 4            Best KNN 0.9736842