Assignment3

Question 13: Logistic Regression and Classification on Weekly Dataset

library(ISLR2)
library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:ISLR2':
## 
##     Boston

library(class)
library(naivebayes)

## naivebayes 1.0.0 loaded

## For more information please visit:

## https://majkamichal.github.io/naivebayes/

library(corrplot)

## corrplot 0.95 loaded

library(e1071)

(a) Numerical and Graphical Summaries

data(Weekly)
summary(Weekly)

##       Year           Lag1               Lag2               Lag3         
##  Min.   :1990   Min.   :-18.1950   Min.   :-18.1950   Min.   :-18.1950  
##  1st Qu.:1995   1st Qu.: -1.1540   1st Qu.: -1.1540   1st Qu.: -1.1580  
##  Median :2000   Median :  0.2410   Median :  0.2410   Median :  0.2410  
##  Mean   :2000   Mean   :  0.1506   Mean   :  0.1511   Mean   :  0.1472  
##  3rd Qu.:2005   3rd Qu.:  1.4050   3rd Qu.:  1.4090   3rd Qu.:  1.4090  
##  Max.   :2010   Max.   : 12.0260   Max.   : 12.0260   Max.   : 12.0260  
##       Lag4               Lag5              Volume            Today         
##  Min.   :-18.1950   Min.   :-18.1950   Min.   :0.08747   Min.   :-18.1950  
##  1st Qu.: -1.1580   1st Qu.: -1.1660   1st Qu.:0.33202   1st Qu.: -1.1540  
##  Median :  0.2380   Median :  0.2340   Median :1.00268   Median :  0.2410  
##  Mean   :  0.1458   Mean   :  0.1399   Mean   :1.57462   Mean   :  0.1499  
##  3rd Qu.:  1.4090   3rd Qu.:  1.4050   3rd Qu.:2.05373   3rd Qu.:  1.4050  
##  Max.   : 12.0260   Max.   : 12.0260   Max.   :9.32821   Max.   : 12.0260  
##  Direction 
##  Down:484  
##  Up  :605  
##            
##            
##            
##

pairs(Weekly)

str(`Weekly`)

## 'data.frame':    1089 obs. of  9 variables:
##  $ Year     : num  1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
##  $ Lag1     : num  0.816 -0.27 -2.576 3.514 0.712 ...
##  $ Lag2     : num  1.572 0.816 -0.27 -2.576 3.514 ...
##  $ Lag3     : num  -3.936 1.572 0.816 -0.27 -2.576 ...
##  $ Lag4     : num  -0.229 -3.936 1.572 0.816 -0.27 ...
##  $ Lag5     : num  -3.484 -0.229 -3.936 1.572 0.816 ...
##  $ Volume   : num  0.155 0.149 0.16 0.162 0.154 ...
##  $ Today    : num  -0.27 -2.576 3.514 0.712 1.178 ...
##  $ Direction: Factor w/ 2 levels "Down","Up": 1 1 2 2 2 1 2 2 2 1 ...

weekly_numeric <- Weekly[, sapply(Weekly, is.numeric)]  # Select numeric columns
cor_matrix <- cor(weekly_numeric)
print(cor_matrix)

##               Year         Lag1        Lag2        Lag3         Lag4
## Year    1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1   -0.03228927  1.000000000 -0.07485305  0.05863568 -0.071273876
## Lag2   -0.03339001 -0.074853051  1.00000000 -0.07572091  0.058381535
## Lag3   -0.03000649  0.058635682 -0.07572091  1.00000000 -0.075395865
## Lag4   -0.03112792 -0.071273876  0.05838153 -0.07539587  1.000000000
## Lag5   -0.03051910 -0.008183096 -0.07249948  0.06065717 -0.075675027
## Volume  0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today  -0.03245989 -0.075031842  0.05916672 -0.07124364 -0.007825873
##                Lag5      Volume        Today
## Year   -0.030519101  0.84194162 -0.032459894
## Lag1   -0.008183096 -0.06495131 -0.075031842
## Lag2   -0.072499482 -0.08551314  0.059166717
## Lag3    0.060657175 -0.06928771 -0.071243639
## Lag4   -0.075675027 -0.06107462 -0.007825873
## Lag5    1.000000000 -0.05851741  0.011012698
## Volume -0.058517414  1.00000000 -0.033077783
## Today   0.011012698 -0.03307778  1.000000000

attach(`Weekly`)
plot(Volume)

corrplot(cor(Weekly[, -9]), diag = FALSE)

(b) Logistic Regression on Full Data

glm.fits=glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data=Weekly,family=binomial )
summary (glm.fits)

## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
##     Volume, family = binomial, data = Weekly)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.26686    0.08593   3.106   0.0019 **
## Lag1        -0.04127    0.02641  -1.563   0.1181   
## Lag2         0.05844    0.02686   2.175   0.0296 * 
## Lag3        -0.01606    0.02666  -0.602   0.5469   
## Lag4        -0.02779    0.02646  -1.050   0.2937   
## Lag5        -0.01447    0.02638  -0.549   0.5833   
## Volume      -0.02274    0.03690  -0.616   0.5377   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1496.2  on 1088  degrees of freedom
## Residual deviance: 1486.4  on 1082  degrees of freedom
## AIC: 1500.4
## 
## Number of Fisher Scoring iterations: 4

coef(glm.fits)

## (Intercept)        Lag1        Lag2        Lag3        Lag4        Lag5 
##  0.26686414 -0.04126894  0.05844168 -0.01606114 -0.02779021 -0.01447206 
##      Volume 
## -0.02274153

summary(glm.fits)$coef

##                Estimate Std. Error    z value    Pr(>|z|)
## (Intercept)  0.26686414 0.08592961  3.1056134 0.001898848
## Lag1        -0.04126894 0.02641026 -1.5626099 0.118144368
## Lag2         0.05844168 0.02686499  2.1753839 0.029601361
## Lag3        -0.01606114 0.02666299 -0.6023760 0.546923890
## Lag4        -0.02779021 0.02646332 -1.0501409 0.293653342
## Lag5        -0.01447206 0.02638478 -0.5485006 0.583348244
## Volume      -0.02274153 0.03689812 -0.6163330 0.537674762

summary (glm.fits)$coef[,4]

## (Intercept)        Lag1        Lag2        Lag3        Lag4        Lag5 
## 0.001898848 0.118144368 0.029601361 0.546923890 0.293653342 0.583348244 
##      Volume 
## 0.537674762

glm.probs=predict(glm.fits,type="response")
glm.probs

##         1         2         3         4         5         6         7         8 
## 0.6086249 0.6010314 0.5875699 0.4816416 0.6169013 0.5684190 0.5786097 0.5151972 
##         9        10        11        12        13        14        15        16 
## 0.5715200 0.5554287 0.6092096 0.5370125 0.5125263 0.5858984 0.6248679 0.5475215 
##        17        18        19        20        21        22        23        24 
## 0.4993612 0.5929660 0.5235241 0.5914015 0.5233199 0.5793948 0.5487893 0.6177056 
##        25        26        27        28        29        30        31        32 
## 0.5527999 0.5460343 0.5685318 0.5803125 0.6172036 0.4934650 0.6555741 0.6171904 
##        33        34        35        36        37        38        39        40 
## 0.5463122 0.5511387 0.5427226 0.6511888 0.4799924 0.6554093 0.5078026 0.5874222 
##        41        42        43        44        45        46        47        48 
## 0.5561466 0.5779499 0.5162131 0.5732572 0.5844682 0.5256940 0.5780687 0.5661878 
##        49        50        51        52        53        54        55        56 
## 0.5442368 0.4940888 0.6542135 0.5601120 0.5109621 0.5705423 0.5763342 0.4870961 
##        57        58        59        60        61        62        63        64 
## 0.5398895 0.5777228 0.5662011 0.5083862 0.5994140 0.5563850 0.5656076 0.5803157 
##        65        66        67        68        69        70        71        72 
## 0.5276638 0.5793301 0.5591107 0.5455255 0.5626275 0.6424553 0.4996649 0.5716244 
##        73        74        75        76        77        78        79        80 
## 0.5696691 0.5414298 0.5723122 0.5912072 0.5833957 0.5178605 0.5794945 0.5648539 
##        81        82        83        84        85        86        87        88 
## 0.5284921 0.5898992 0.5801749 0.5416755 0.5294122 0.6018926 0.5809868 0.5468688 
##        89        90        91        92        93        94        95        96 
## 0.5407561 0.6363699 0.5079991 0.5762251 0.5940991 0.5378897 0.5444802 0.5743682 
##        97        98        99       100       101       102       103       104 
## 0.5874124 0.5822334 0.5117216 0.5888020 0.5910009 0.4909972 0.5507675 0.5619728 
##       105       106       107       108       109       110       111       112 
## 0.5365457 0.5784235 0.5839204 0.5588908 0.5865144 0.5321418 0.5632517 0.6142641 
##       113       114       115       116       117       118       119       120 
## 0.5423347 0.5482773 0.5555243 0.6298098 0.5174470 0.5533264 0.5897124 0.5315836 
##       121       122       123       124       125       126       127       128 
## 0.5725021 0.5773067 0.5652132 0.5634758 0.5499618 0.5571037 0.6009711 0.5702637 
##       129       130       131       132       133       134       135       136 
## 0.5619960 0.5065742 0.6207185 0.5383340 0.5685396 0.5446738 0.5674001 0.5743053 
##       137       138       139       140       141       142       143       144 
## 0.5677472 0.5910519 0.5363138 0.5714149 0.5295336 0.6119773 0.5696859 0.5728145 
##       145       146       147       148       149       150       151       152 
## 0.5330210 0.5631200 0.5645594 0.5613684 0.5532959 0.5417279 0.5860699 0.5586863 
##       153       154       155       156       157       158       159       160 
## 0.5551734 0.5231944 0.6073706 0.5614204 0.5433143 0.6001256 0.5618151 0.4948691 
##       161       162       163       164       165       166       167       168 
## 0.5962508 0.5762173 0.5675713 0.5564650 0.5635280 0.5416300 0.5589458 0.6246984 
##       169       170       171       172       173       174       175       176 
## 0.5169751 0.5695539 0.5875498 0.5438176 0.5721820 0.5760457 0.5594479 0.5524302 
##       177       178       179       180       181       182       183       184 
## 0.5435552 0.5888784 0.5582854 0.5758776 0.5517775 0.5669257 0.5676329 0.5622920 
##       185       186       187       188       189       190       191       192 
## 0.5527186 0.5714571 0.5691234 0.5524204 0.5602703 0.5535162 0.5542721 0.5833821 
##       193       194       195       196       197       198       199       200 
## 0.5421788 0.6025984 0.5261215 0.5888043 0.5242834 0.5936378 0.5590675 0.5621955 
##       201       202       203       204       205       206       207       208 
## 0.5716880 0.5561803 0.5680635 0.5682127 0.5503054 0.5618859 0.5779807 0.5468416 
##       209       210       211       212       213       214       215       216 
## 0.5854490 0.5300383 0.5728903 0.5699314 0.5706740 0.5614676 0.5650816 0.6028580 
##       217       218       219       220       221       222       223       224 
## 0.5596448 0.5156007 0.5952559 0.5868019 0.5721252 0.5812230 0.5589218 0.5244432 
##       225       226       227       228       229       230       231       232 
## 0.5993803 0.5647461 0.5612108 0.5459016 0.5937441 0.5076433 0.5832752 0.5855495 
##       233       234       235       236       237       238       239       240 
## 0.5857413 0.5380450 0.5749373 0.5438053 0.5702257 0.5413638 0.5940256 0.5473134 
##       241       242       243       244       245       246       247       248 
## 0.5354131 0.5969382 0.5261704 0.5978580 0.5213158 0.6272494 0.5286936 0.6045370 
##       249       250       251       252       253       254       255       256 
## 0.5169829 0.5664237 0.5915468 0.5430643 0.5908881 0.5305143 0.6107822 0.5677341 
##       257       258       259       260       261       262       263       264 
## 0.5456921 0.5467197 0.5822734 0.5422839 0.5557904 0.5766811 0.5575591 0.5342318 
##       265       266       267       268       269       270       271       272 
## 0.5776740 0.5398437 0.5574790 0.5666197 0.5720399 0.5363851 0.5637602 0.5656644 
##       273       274       275       276       277       278       279       280 
## 0.5400001 0.5639787 0.5630773 0.5796036 0.5223389 0.5534837 0.5983258 0.5205742 
##       281       282       283       284       285       286       287       288 
## 0.5668187 0.5906810 0.5097030 0.5714526 0.5757395 0.5169049 0.5881619 0.5602070 
##       289       290       291       292       293       294       295       296 
## 0.5423076 0.5752488 0.5644755 0.5548749 0.5615064 0.5824449 0.5350584 0.5571488 
##       297       298       299       300       301       302       303       304 
## 0.5495397 0.5632003 0.5838696 0.5227471 0.5880212 0.5566692 0.5729664 0.5377796 
##       305       306       307       308       309       310       311       312 
## 0.5536240 0.5815944 0.5538216 0.5325246 0.5697948 0.5933091 0.5093568 0.5779388 
##       313       314       315       316       317       318       319       320 
## 0.5731189 0.5547897 0.5970898 0.4932252 0.5858283 0.5391747 0.5281452 0.5832704 
##       321       322       323       324       325       326       327       328 
## 0.6066850 0.5280506 0.6048338 0.5019399 0.5728660 0.6095074 0.5172612 0.5547660 
##       329       330       331       332       333       334       335       336 
## 0.5877458 0.5840046 0.5081875 0.5708770 0.5484547 0.5655005 0.5971151 0.5537199 
##       337       338       339       340       341       342       343       344 
## 0.5539844 0.5702030 0.5384996 0.6387192 0.5493983 0.5415859 0.5751379 0.5217641 
##       345       346       347       348       349       350       351       352 
## 0.5390281 0.6197944 0.5680755 0.5073720 0.5776221 0.5360180 0.5845579 0.5265423 
##       353       354       355       356       357       358       359       360 
## 0.5262954 0.6115952 0.5487298 0.5432930 0.5768577 0.5275021 0.5095094 0.6109677 
##       361       362       363       364       365       366       367       368 
## 0.5979607 0.5127032 0.5506223 0.5997847 0.5169115 0.5706009 0.5342584 0.5931879 
##       369       370       371       372       373       374       375       376 
## 0.5457038 0.5105159 0.6061357 0.5591364 0.5588375 0.5737989 0.5793927 0.5063235 
##       377       378       379       380       381       382       383       384 
## 0.6475012 0.5083097 0.6203323 0.5407460 0.5029741 0.5576154 0.5364731 0.5207690 
##       385       386       387       388       389       390       391       392 
## 0.6028332 0.5604089 0.4757713 0.5974355 0.5574903 0.5164057 0.5800462 0.5815701 
##       393       394       395       396       397       398       399       400 
## 0.5578011 0.4768951 0.6445267 0.5110922 0.6207023 0.5214015 0.5989503 0.5141917 
##       401       402       403       404       405       406       407       408 
## 0.5758607 0.5755234 0.5181782 0.5874211 0.5238373 0.6035994 0.5413394 0.6246353 
##       409       410       411       412       413       414       415       416 
## 0.5002412 0.6136433 0.5050945 0.5601064 0.5192467 0.6844858 0.4482419 0.6133784 
##       417       418       419       420       421       422       423       424 
## 0.5366787 0.5572317 0.5829508 0.5318511 0.5340025 0.5553525 0.5410862 0.5338357 
##       425       426       427       428       429       430       431       432 
## 0.5929422 0.5103211 0.5868051 0.5181872 0.5800338 0.5255719 0.5930736 0.5464041 
##       433       434       435       436       437       438       439       440 
## 0.5636584 0.5865046 0.5189682 0.6119890 0.5446836 0.5321998 0.5942007 0.5558781 
##       441       442       443       444       445       446       447       448 
## 0.5400075 0.6043057 0.5017929 0.5614943 0.5739404 0.5465742 0.6683375 0.5615654 
##       449       450       451       452       453       454       455       456 
## 0.4660692 0.6486217 0.5920476 0.6255748 0.4908835 0.4554909 0.6742924 0.5530564 
##       457       458       459       460       461       462       463       464 
## 0.5111866 0.5859531 0.4701471 0.5558328 0.5928991 0.5241571 0.5064317 0.5605027 
##       465       466       467       468       469       470       471       472 
## 0.6093877 0.5040861 0.6103963 0.5121752 0.4780138 0.6627907 0.5255695 0.5311087 
##       473       474       475       476       477       478       479       480 
## 0.5819564 0.5438847 0.5873708 0.5657513 0.5540727 0.5122585 0.5283033 0.6453425 
##       481       482       483       484       485       486       487       488 
## 0.4815798 0.5951610 0.5191683 0.5723108 0.5580248 0.5783080 0.5142229 0.6297825 
##       489       490       491       492       493       494       495       496 
## 0.4945390 0.6399040 0.4680441 0.6260839 0.5409826 0.5867130 0.4891312 0.5605466 
##       497       498       499       500       501       502       503       504 
## 0.5439818 0.6230237 0.5768180 0.5587314 0.5601920 0.5573860 0.5826234 0.4990125 
##       505       506       507       508       509       510       511       512 
## 0.5522657 0.7100983 0.4165974 0.5719054 0.6348528 0.5249158 0.5192383 0.5650196 
##       513       514       515       516       517       518       519       520 
## 0.5213008 0.5711516 0.5321640 0.5365227 0.5954012 0.5819323 0.4939905 0.5929459 
##       521       522       523       524       525       526       527       528 
## 0.5973992 0.4321437 0.6768488 0.5786161 0.5252752 0.5005390 0.6790260 0.4881697 
##       529       530       531       532       533       534       535       536 
## 0.5568162 0.6076837 0.4723601 0.6423898 0.3454964 0.6664303 0.6359851 0.5431683 
##       537       538       539       540       541       542       543       544 
## 0.5360510 0.5759321 0.4713744 0.6893839 0.5246529 0.5477103 0.5094661 0.5653308 
##       545       546       547       548       549       550       551       552 
## 0.5689184 0.6035017 0.5504700 0.4576816 0.6195025 0.5793218 0.5626917 0.5458301 
##       553       554       555       556       557       558       559       560 
## 0.5768882 0.5396859 0.5412363 0.5677811 0.5909953 0.5755163 0.5243599 0.6208475 
##       561       562       563       564       565       566       567       568 
## 0.5240309 0.6524718 0.4860934 0.5803736 0.5702262 0.5103699 0.6766865 0.5065701 
##       569       570       571       572       573       574       575       576 
## 0.5380064 0.6050962 0.5333236 0.5634127 0.5775587 0.5654597 0.5589410 0.5216716 
##       577       578       579       580       581       582       583       584 
## 0.5976741 0.5303480 0.5803302 0.6588484 0.4926406 0.5579756 0.6587106 0.4929260 
##       585       586       587       588       589       590       591       592 
## 0.5796839 0.6159637 0.5164082 0.5379362 0.4694882 0.6198054 0.5508937 0.5225409 
##       593       594       595       596       597       598       599       600 
## 0.6041745 0.5036198 0.5908384 0.6098257 0.5060939 0.6015740 0.5692736 0.5438907 
##       601       602       603       604       605       606       607       608 
## 0.5877794 0.5577073 0.5105280 0.6507249 0.5548131 0.6180004 0.3596924 0.7163238 
##       609       610       611       612       613       614       615       616 
## 0.6394566 0.5779706 0.4493578 0.5998973 0.4977878 0.5783635 0.5621367 0.5616890 
##       617       618       619       620       621       622       623       624 
## 0.5013058 0.6036959 0.4901973 0.5796923 0.5766799 0.5883556 0.5208045 0.5276387 
##       625       626       627       628       629       630       631       632 
## 0.5927599 0.5839588 0.5245479 0.5960699 0.5168693 0.5921110 0.5895644 0.5436214 
##       633       634       635       636       637       638       639       640 
## 0.5049412 0.5745288 0.5487506 0.5457591 0.6382133 0.5079525 0.5848666 0.5104466 
##       641       642       643       644       645       646       647       648 
## 0.6706126 0.5391181 0.5548078 0.5274079 0.5812275 0.5681988 0.5939935 0.6422907 
##       649       650       651       652       653       654       655       656 
## 0.5425670 0.4583355 0.6285521 0.6013063 0.6278461 0.5461432 0.5568079 0.5065030 
##       657       658       659       660       661       662       663       664 
## 0.5232405 0.6239058 0.5349621 0.5900956 0.5089219 0.6047730 0.6375462 0.5342957 
##       665       666       667       668       669       670       671       672 
## 0.5080706 0.4982668 0.5531530 0.5822630 0.5751894 0.5230775 0.5126628 0.6194379 
##       673       674       675       676       677       678       679       680 
## 0.5117847 0.6037463 0.6138306 0.5368959 0.4824428 0.6085781 0.5518537 0.5846550 
##       681       682       683       684       685       686       687       688 
## 0.6100005 0.5622388 0.5212453 0.4937156 0.7045398 0.4591680 0.5579273 0.5019379 
##       689       690       691       692       693       694       695       696 
## 0.5989802 0.5210005 0.5864058 0.5231227 0.5599086 0.4870586 0.5737534 0.5846341 
##       697       698       699       700       701       702       703       704 
## 0.5235577 0.5596720 0.5079938 0.5618378 0.5876116 0.5412792 0.5746223 0.5313397 
##       705       706       707       708       709       710       711       712 
## 0.5484921 0.5876204 0.5516370 0.5568272 0.5680953 0.5203460 0.6079205 0.4587737 
##       713       714       715       716       717       718       719       720 
## 0.6024785 0.5758942 0.5577907 0.5034688 0.5865739 0.5632888 0.5576696 0.5086788 
##       721       722       723       724       725       726       727       728 
## 0.5936381 0.5531257 0.5497506 0.5597227 0.5435763 0.5450462 0.5444482 0.5634503 
##       729       730       731       732       733       734       735       736 
## 0.5494791 0.5178417 0.5672053 0.5651662 0.5505195 0.5440208 0.6037956 0.5182241 
##       737       738       739       740       741       742       743       744 
## 0.5509473 0.5462722 0.6218541 0.5518285 0.5267678 0.5865525 0.5246443 0.5584827 
##       745       746       747       748       749       750       751       752 
## 0.5778633 0.5475059 0.5985855 0.5416472 0.5613601 0.5420462 0.5574981 0.5555546 
##       753       754       755       756       757       758       759       760 
## 0.5572137 0.5672736 0.5380961 0.6288296 0.5162164 0.5370392 0.6142804 0.5667208 
##       761       762       763       764       765       766       767       768 
## 0.5320097 0.5491606 0.5712877 0.5045668 0.5945248 0.5613639 0.5468458 0.5125795 
##       769       770       771       772       773       774       775       776 
## 0.5856365 0.5878151 0.5619301 0.4927447 0.5478086 0.5693274 0.5413631 0.5452481 
##       777       778       779       780       781       782       783       784 
## 0.5761716 0.5731165 0.5169958 0.5721833 0.5486374 0.5481036 0.6035000 0.5561117 
##       785       786       787       788       789       790       791       792 
## 0.5242339 0.5499644 0.5867428 0.5314951 0.5564833 0.5438242 0.5689854 0.6101141 
##       793       794       795       796       797       798       799       800 
## 0.4998552 0.5700161 0.5646784 0.5924735 0.4920364 0.5886151 0.5638022 0.5326469 
##       801       802       803       804       805       806       807       808 
## 0.5266755 0.5969437 0.5168311 0.5411947 0.5710588 0.5698966 0.5460172 0.5463105 
##       809       810       811       812       813       814       815       816 
## 0.5350782 0.5706830 0.5595445 0.5305365 0.5605804 0.5911755 0.5573610 0.5006451 
##       817       818       819       820       821       822       823       824 
## 0.5981353 0.5317577 0.5570797 0.5458900 0.5740883 0.5691984 0.5438528 0.5328946 
##       825       826       827       828       829       830       831       832 
## 0.5601641 0.5372154 0.5267937 0.5615436 0.5774812 0.4952175 0.5974479 0.5745732 
##       833       834       835       836       837       838       839       840 
## 0.4886085 0.5893925 0.5349565 0.5423141 0.5790393 0.5553924 0.5433329 0.5201113 
##       841       842       843       844       845       846       847       848 
## 0.5890816 0.5514397 0.5330581 0.5584058 0.5336897 0.5816505 0.5367150 0.5863620 
##       849       850       851       852       853       854       855       856 
## 0.5224365 0.5169490 0.5824923 0.6060852 0.5081011 0.5615662 0.5409679 0.6017141 
##       857       858       859       860       861       862       863       864 
## 0.5675841 0.5045995 0.5295374 0.6107446 0.5581926 0.4873935 0.5942473 0.5301539 
##       865       866       867       868       869       870       871       872 
## 0.5674075 0.5102382 0.5767224 0.5257364 0.5579780 0.5462655 0.5533285 0.5301312 
##       873       874       875       876       877       878       879       880 
## 0.5574106 0.5168306 0.5520166 0.5755018 0.5415534 0.5227594 0.5482981 0.5808434 
##       881       882       883       884       885       886       887       888 
## 0.5253596 0.5555041 0.5277809 0.5754684 0.5527539 0.5152885 0.5820282 0.5260608 
##       889       890       891       892       893       894       895       896 
## 0.5658781 0.5801068 0.4682125 0.5909513 0.5242807 0.6233820 0.5088109 0.5522289 
##       897       898       899       900       901       902       903       904 
## 0.5245540 0.5644530 0.5317816 0.5418341 0.5232540 0.5625742 0.5217368 0.5818423 
##       905       906       907       908       909       910       911       912 
## 0.4989459 0.5919640 0.5200675 0.5387161 0.5678082 0.5802998 0.5575884 0.4760558 
##       913       914       915       916       917       918       919       920 
## 0.5182760 0.6118128 0.5419495 0.5864565 0.5485419 0.4957682 0.5492068 0.5908790 
##       921       922       923       924       925       926       927       928 
## 0.5088339 0.5492171 0.5721235 0.4493546 0.6013925 0.5719251 0.4905404 0.5810776 
##       929       930       931       932       933       934       935       936 
## 0.5267599 0.5869964 0.5891485 0.4767770 0.5640925 0.5938866 0.4838119 0.5986703 
##       937       938       939       940       941       942       943       944 
## 0.4903157 0.5398586 0.6908610 0.4610420 0.5468855 0.5723763 0.5520243 0.5026530 
##       945       946       947       948       949       950       951       952 
## 0.5234972 0.6258337 0.4840350 0.6167381 0.4394534 0.5866459 0.5261577 0.5583840 
##       953       954       955       956       957       958       959       960 
## 0.4671105 0.6145592 0.4737788 0.5980399 0.5073452 0.5834843 0.5372300 0.5307699 
##       961       962       963       964       965       966       967       968 
## 0.5684054 0.5218672 0.5876939 0.5426882 0.5121505 0.5752169 0.5383800 0.5261866 
##       969       970       971       972       973       974       975       976 
## 0.5505270 0.4792343 0.5412403 0.5951428 0.5840288 0.5774733 0.2864474 0.7823952 
##       977       978       979       980       981       982       983       984 
## 0.4646948 0.7622202 0.5289881 0.4882306 0.3107741 0.7837046 0.5281769 0.4858185 
##       985       986       987       988       989       990       991       992 
## 0.5504902 0.4522655 0.6739877 0.4958246 0.4649993 0.5357007 0.5233753 0.6829693 
##       993       994       995       996       997       998       999      1000 
## 0.5229491 0.4564008 0.5715682 0.3959387 0.7280284 0.5090166 0.5321638 0.4871418 
##      1001      1002      1003      1004      1005      1006      1007      1008 
## 0.4748783 0.5010539 0.4837662 0.4647475 0.6549362 0.4223010 0.4776351 0.5748351 
##      1009      1010      1011      1012      1013      1014      1015      1016 
## 0.5655057 0.5383031 0.4701262 0.5620443 0.5412192 0.4606573 0.6206380 0.5818885 
##      1017      1018      1019      1020      1021      1022      1023      1024 
## 0.4604591 0.5194058 0.4781920 0.5464957 0.5402801 0.4789306 0.5398621 0.5927368 
##      1025      1026      1027      1028      1029      1030      1031      1032 
## 0.4998846 0.4506659 0.6027015 0.5696336 0.5319400 0.4217298 0.5811985 0.5949228 
##      1033      1034      1035      1036      1037      1038      1039      1040 
## 0.5286731 0.5008779 0.5550069 0.5339469 0.5124905 0.5907060 0.4938868 0.5786856 
##      1041      1042      1043      1044      1045      1046      1047      1048 
## 0.5515408 0.4859885 0.5325899 0.5584339 0.5523840 0.5990181 0.4897915 0.5510282 
##      1049      1050      1051      1052      1053      1054      1055      1056 
## 0.5253628 0.5223605 0.5193080 0.5295971 0.5436610 0.4947344 0.5746216 0.5399726 
##      1057      1058      1059      1060      1061      1062      1063      1064 
## 0.4143911 0.6376433 0.5164136 0.5854627 0.4980502 0.5751763 0.6151925 0.5175391 
##      1065      1066      1067      1068      1069      1070      1071      1072 
## 0.4025249 0.6635541 0.5127492 0.5800149 0.4985986 0.5892294 0.4777904 0.5436365 
##      1073      1074      1075      1076      1077      1078      1079      1080 
## 0.5196729 0.6175894 0.5279273 0.5191125 0.5531570 0.5044628 0.5370563 0.5358719 
##      1081      1082      1083      1084      1085      1086      1087      1088 
## 0.5371481 0.4876143 0.6079918 0.4940985 0.5415753 0.5012871 0.5797230 0.5509170 
##      1089 
## 0.5221216

contrasts(Direction)

##      Up
## Down  0
## Up    1

glm.pred=rep("Down" ,1089)
glm.pred[glm.probs >.5]=" Up"

(c) Confusion Matrix and Accuracy

table(glm.pred ,Direction )

##         Direction
## glm.pred Down  Up
##      Up   430 557
##     Down   54  48

(557+54)/1089

## [1] 0.5610652

mean(glm.pred==Direction )

## [1] 0.04958678

(d) Logistic Regression with Training Data (1990-2008) and Testing (2009-2010)

# Define training set (Years 1990-2008)
train_index <- Weekly$Year < 2009

# Define test set
test_index <- !train_index

# Train logistic regression on Lag2 only
glm.fit2 <- glm(Direction ~ Lag2, data = Weekly, family = binomial, subset = train_index)

# Predict on test data
glm.probs2 <- predict(glm.fit2, Weekly[test_index, ], type = "response")

# Convert probabilities to class labels
glm.pred2 <- ifelse(glm.probs2 > 0.5, "Up", "Down")

# Confusion matrix for test data
conf_matrix2 <- table(glm.pred2, Weekly$Direction[test_index])
print(conf_matrix2)

##          
## glm.pred2 Down Up
##      Down    9  5
##      Up     34 56

# Compute test accuracy
test_accuracy <- mean(glm.pred2 == Weekly$Direction[test_index])
print(paste("Test Accuracy:", round(test_accuracy, 4)))

## [1] "Test Accuracy: 0.625"

(e) Linear Discriminant Analysis (LDA)

# Fit LDA model on training data
lda.fit <- lda(Direction ~ Lag2, data = Weekly, subset = train_index)

# Predict on test data
lda.pred <- predict(lda.fit, Weekly[test_index, ])
lda.class <- lda.pred$class

# Confusion matrix for LDA
conf_matrix_lda <- table(lda.class, Weekly$Direction[test_index])
print(conf_matrix_lda)

##          
## lda.class Down Up
##      Down    9  5
##      Up     34 56

# Compute test accuracy
lda_accuracy <- mean(lda.class == Weekly$Direction[test_index])
print(paste("LDA Test Accuracy:", round(lda_accuracy, 4)))

## [1] "LDA Test Accuracy: 0.625"

(f) Quadratic Discriminant Analysis (QDA)

# Fit QDA model on training data
qda.fit <- qda(Direction ~ Lag2, data = Weekly, subset = train_index)

# Predict on test data
qda.pred <- predict(qda.fit, Weekly[test_index, ])
qda.class <- qda.pred$class

# Confusion matrix for QDA
conf_matrix_qda <- table(qda.class, Weekly$Direction[test_index])
print(conf_matrix_qda)

##          
## qda.class Down Up
##      Down    0  0
##      Up     43 61

# Compute test accuracy
qda_accuracy <- mean(qda.class == Weekly$Direction[test_index])
print(paste("QDA Test Accuracy:", round(qda_accuracy, 4)))

## [1] "QDA Test Accuracy: 0.5865"

(g) K-Nearest Neighbors (KNN) with K = 1

# Prepare training and testing data for KNN
train_x <- Weekly[train_index, "Lag2", drop = FALSE]
test_x <- Weekly[test_index, "Lag2", drop = FALSE]

# Convert Direction to a binary class for KNN
train_y <- Weekly$Direction[train_index]
test_y <- Weekly$Direction[test_index]

# Fit KNN model with K=1
set.seed(1)
knn.pred <- knn(train_x, test_x, train_y, k = 1)

# Confusion matrix for KNN
conf_matrix_knn <- table(knn.pred, test_y)
print(conf_matrix_knn)

##         test_y
## knn.pred Down Up
##     Down   21 30
##     Up     22 31

# Compute test accuracy
knn_accuracy <- mean(knn.pred == test_y)
print(paste("KNN Test Accuracy (K=1):", round(knn_accuracy, 4)))

## [1] "KNN Test Accuracy (K=1): 0.5"

(h) Naive Bayes Classifier

# Fit Naive Bayes model
nb.fit <- naiveBayes(Direction ~ Lag2, data = Weekly, subset = train_index)

# Predict on test data
nb.pred <- predict(nb.fit, Weekly[test_index, ])

# Confusion matrix for Naive Bayes
conf_matrix_nb <- table(nb.pred, Weekly$Direction[test_index])
print(conf_matrix_nb)

##        
## nb.pred Down Up
##    Down    0  0
##    Up     43 61

# Compute test accuracy
nb_accuracy <- mean(nb.pred == Weekly$Direction[test_index])
print(paste("Naive Bayes Test Accuracy:", round(nb_accuracy, 4)))

## [1] "Naive Bayes Test Accuracy: 0.5865"

(i) Best Performing Method

Both LDA and Logistic Regression were the best performers

accuracy_results <- data.frame(Method = c("Logistic Regression", "LDA", "QDA", "KNN (K=1)", "Naive Bayes"),
                               Accuracy = c(test_accuracy, lda_accuracy, qda_accuracy, knn_accuracy, nb_accuracy))
accuracy_results

##                Method  Accuracy
## 1 Logistic Regression 0.6250000
## 2                 LDA 0.6250000
## 3                 QDA 0.5865385
## 4           KNN (K=1) 0.5000000
## 5         Naive Bayes 0.5865385

(j) Experimenting with Different Predictors and Transformations

glm.fit3 <- glm(Direction ~ Lag2 + Lag1 + I(Lag2^2) + Volume, data = Weekly, 
                family = binomial, subset = train_index)

summary(glm.fit3)

## 
## Call:
## glm(formula = Direction ~ Lag2 + Lag1 + I(Lag2^2) + Volume, family = binomial, 
##     data = Weekly, subset = train_index)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.287097   0.091910   3.124  0.00179 **
## Lag2         0.054652   0.030130   1.814  0.06970 . 
## Lag1        -0.061893   0.029276  -2.114  0.03451 * 
## I(Lag2^2)    0.006756   0.004718   1.432  0.15216   
## Volume      -0.090157   0.054444  -1.656  0.09773 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1354.7  on 984  degrees of freedom
## Residual deviance: 1343.0  on 980  degrees of freedom
## AIC: 1353
## 
## Number of Fisher Scoring iterations: 4

Question 14

# Load dataset
data(Auto)

(a) Create binary mpg01 variable

Auto$mpg01 <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
summary(Auto$mpg01)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.5     0.5     1.0     1.0

(b) Graphical Exploration

boxplot(Auto$mpg ~ Auto$cylinders, main="MPG vs Cylinders")

boxplot(Auto$mpg ~ Auto$horsepower, main="MPG vs Horsepower")

boxplot(Auto$mpg ~ Auto$weight, main="MPG vs Weight")

pairs(Auto)

(c) Split data into training and test sets

set.seed(42)
train_idx <- sample(1:nrow(Auto), 0.7 * nrow(Auto))
train_data <- Auto[train_idx, ]
test_data <- Auto[-train_idx, ]

(d) Linear Discriminant Analysis (LDA)

lda_model1 <- lda(mpg01 ~ cylinders + horsepower + weight, data = train_data)
pred_lda <- predict(lda_model1, test_data)
conf_matrix_lda <- table(Predicted = pred_lda$class, Actual = test_data$mpg01)
conf_matrix_lda

##          Actual
## Predicted  0  1
##         0 43  5
##         1  4 66

accuracy_lda <- sum(diag(conf_matrix_lda)) / sum(conf_matrix_lda)
accuracy_lda

## [1] 0.9237288

(e) Quadratic Discriminant Analysis (QDA)

qda_model1 <- qda(mpg01 ~ cylinders + horsepower + weight, data = train_data)
pred_qda <- predict(qda_model1, test_data)
conf_matrix_qda <- table(Predicted = pred_qda$class, Actual = test_data$mpg01)
conf_matrix_qda

##          Actual
## Predicted  0  1
##         0 43  6
##         1  4 65

accuracy_qda <- sum(diag(conf_matrix_qda)) / sum(conf_matrix_qda)
accuracy_qda

## [1] 0.9152542

(f) Logistic Regression

logistic_model <- glm(mpg01 ~ cylinders + horsepower + weight, data = train_data, family = binomial)
pred_logistic <- predict(logistic_model, test_data, type = "response")
pred_class_logistic <- ifelse(pred_logistic > 0.5, 1, 0)
conf_matrix_logistic <- table(Predicted = pred_class_logistic, Actual = test_data$mpg01)
conf_matrix_logistic

##          Actual
## Predicted  0  1
##         0 44  5
##         1  3 66

accuracy_logistic <- sum(diag(conf_matrix_logistic)) / sum(conf_matrix_logistic)
accuracy_logistic

## [1] 0.9322034

(g) Naive Bayes Classifier

# Convert mpg01 to factor (required for Naive Bayes)
train_data$mpg01 <- as.factor(train_data$mpg01)
test_data$mpg01 <- as.factor(test_data$mpg01)

# Train the Naive Bayes model
nb_model1 <- naive_bayes(mpg01 ~ cylinders + horsepower + weight, data = train_data)

# Predict on test data
pred_nb <- predict(nb_model1, test_data)

## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.

# Create confusion matrix
conf_matrix_nb <- table(Predicted = pred_nb, Actual = test_data$mpg01)
conf_matrix_nb

##          Actual
## Predicted  0  1
##         0 43  5
##         1  4 66

# Calculate accuracy
accuracy_nb <- sum(diag(conf_matrix_nb)) / sum(conf_matrix_nb)
accuracy_nb

## [1] 0.9237288

(h) K-Nearest Neighbors (KNN) with Different K Values

k_train <- train_data[, c("cylinders", "horsepower", "weight")]
k_test <- test_data[, c("cylinders", "horsepower", "weight")]
k_train_labels <- train_data$mpg01

set.seed(42)
k_values <- c(1, 3, 5, 7, 10)
k_results <- data.frame(K = integer(), Accuracy = numeric())

for (k in k_values) {
  knn_pred <- knn(as.matrix(k_train), as.matrix(k_test), k_train_labels, k = k)
  conf_matrix_knn <- table(Predicted = knn_pred, Actual = test_data$mpg01)
  accuracy_knn <- sum(diag(conf_matrix_knn)) / sum(conf_matrix_knn)
  k_results <- rbind(k_results, data.frame(K = k, Accuracy = accuracy_knn))
}
k_results

##    K  Accuracy
## 1  1 0.8389831
## 2  3 0.8813559
## 3  5 0.8559322
## 4  7 0.8389831
## 5 10 0.8474576

Best Performing Model Summary

accuracy_results <- data.frame(Method = c("LDA", "QDA", "Logistic Regression", "Naive Bayes", "Best KNN"),
                               Accuracy = c(accuracy_lda, accuracy_qda, accuracy_logistic, accuracy_nb, max(k_results$Accuracy)))
accuracy_results

##                Method  Accuracy
## 1                 LDA 0.9237288
## 2                 QDA 0.9152542
## 3 Logistic Regression 0.9322034
## 4         Naive Bayes 0.9237288
## 5            Best KNN 0.8813559

Question 16: Predicting Crime Rate in Boston Dataset

(a) Create Binary Crime Rate Variable

data(Boston)
Boston$crime01 <- ifelse(Boston$crim > median(Boston$crim), 1, 0)
summary(Boston$crime01)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.5     0.5     1.0     1.0

(b) Graphical Exploration

boxplot(Boston$crime01 ~ Boston$rm, main="Crime Rate vs Rooms")

boxplot(Boston$crime01 ~ Boston$dis, main="Crime Rate vs Distance to Employment Centers")

boxplot(Boston$crime01 ~ Boston$lstat, main="Crime Rate vs Lower Status Population")

pairs(Boston[, c("crime01", "rm", "dis", "lstat", "age", "tax")])

(c) Split Data into Training and Test Sets

set.seed(42)
train_idx <- sample(1:nrow(Boston), 0.7 * nrow(Boston))
train_data <- Boston[train_idx, ]
test_data <- Boston[-train_idx, ]

(d) Logistic Regression

logistic_model <- glm(crime01 ~ rm + dis + lstat + tax, data = train_data, family = binomial)
pred_logistic <- predict(logistic_model, test_data, type = "response")
pred_class_logistic <- ifelse(pred_logistic > 0.5, 1, 0)
conf_matrix_logistic <- table(Predicted = pred_class_logistic, Actual = test_data$crime01)
conf_matrix_logistic

##          Actual
## Predicted  0  1
##         0 72 18
##         1  6 56

accuracy_logistic <- sum(diag(conf_matrix_logistic)) / sum(conf_matrix_logistic)
accuracy_logistic

## [1] 0.8421053

(e) Linear Discriminant Analysis (LDA)

lda_model2 <- lda(crime01 ~ rm + dis + lstat + tax, data = train_data)
pred_lda <- predict(lda_model2, test_data)
conf_matrix_lda <- table(Predicted = pred_lda$class, Actual = test_data$crime01)
conf_matrix_lda

##          Actual
## Predicted  0  1
##         0 73 19
##         1  5 55

accuracy_lda <- sum(diag(conf_matrix_lda)) / sum(conf_matrix_lda)
accuracy_lda

## [1] 0.8421053

(f) Naive Bayes Classifier

# Convert crime01 to factor (required for Naive Bayes)
train_data$crime01 <- as.factor(train_data$crime01)
test_data$crime01 <- as.factor(test_data$crime01)

# Train the Naive Bayes model
nb_model2 <- naive_bayes(crime01 ~ rm + dis + lstat + tax, data = train_data)

# Predict on test data
pred_nb <- predict(nb_model2, test_data, type = "class")

## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.

# Create confusion matrix
conf_matrix_nb <- table(Predicted = pred_nb, Actual = test_data$crime01)
conf_matrix_nb

##          Actual
## Predicted  0  1
##         0 73 17
##         1  5 57

# Calculate accuracy
accuracy_nb <- sum(diag(conf_matrix_nb)) / sum(conf_matrix_nb)
accuracy_nb

## [1] 0.8552632

(g) K-Nearest Neighbors (KNN) with Different K Values

k_train <- train_data[, c("rm", "dis", "lstat", "tax")]
k_test <- test_data[, c("rm", "dis", "lstat", "tax")]
k_train_labels <- train_data$crime01

set.seed(42)
k_values <- c(1, 3, 5, 7, 10)
k_results <- data.frame(K = integer(), Accuracy = numeric())

for (k in k_values) {
  knn_pred <- knn(as.matrix(k_train), as.matrix(k_test), k_train_labels, k = k)
  conf_matrix_knn <- table(Predicted = knn_pred, Actual = test_data$crime01)
  accuracy_knn <- sum(diag(conf_matrix_knn)) / sum(conf_matrix_knn)
  k_results <- rbind(k_results, data.frame(K = k, Accuracy = accuracy_knn))
}
k_results

##    K  Accuracy
## 1  1 0.9736842
## 2  3 0.9671053
## 3  5 0.9473684
## 4  7 0.9473684
## 5 10 0.9210526

Best Performing Model Summary

accuracy_results <- data.frame(Method = c("Logistic Regression", "LDA", "Naive Bayes", "Best KNN"),
                               Accuracy = c(accuracy_logistic, accuracy_lda, accuracy_nb, max(k_results$Accuracy)))
accuracy_results

##                Method  Accuracy
## 1 Logistic Regression 0.8421053
## 2                 LDA 0.8421053
## 3         Naive Bayes 0.8552632
## 4            Best KNN 0.9736842

Assignment3

Logan Robinson

2025-02-27

Question 13: Logistic Regression and Classification on Weekly Dataset

(a) Numerical and Graphical Summaries

(b) Logistic Regression on Full Data

(c) Confusion Matrix and Accuracy

(d) Logistic Regression with Training Data (1990-2008) and Testing (2009-2010)

(e) Linear Discriminant Analysis (LDA)

(f) Quadratic Discriminant Analysis (QDA)

(g) K-Nearest Neighbors (KNN) with K = 1

(h) Naive Bayes Classifier

(i) Best Performing Method

(j) Experimenting with Different Predictors and Transformations

Question 14

(a) Create binary mpg01 variable

(b) Graphical Exploration

(c) Split data into training and test sets

(d) Linear Discriminant Analysis (LDA)

(e) Quadratic Discriminant Analysis (QDA)

(f) Logistic Regression

(g) Naive Bayes Classifier

(h) K-Nearest Neighbors (KNN) with Different K Values

Best Performing Model Summary

Question 16: Predicting Crime Rate in Boston Dataset

(a) Create Binary Crime Rate Variable

(b) Graphical Exploration

(c) Split Data into Training and Test Sets

(d) Logistic Regression

(e) Linear Discriminant Analysis (LDA)

(f) Naive Bayes Classifier

(g) K-Nearest Neighbors (KNN) with Different K Values

Best Performing Model Summary