Part 1.

Step 1. load data

data("marketing", package = "datarium")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(ModelMetrics)
## 
## Attaching package: 'ModelMetrics'
## The following objects are masked from 'package:caret':
## 
##     confusionMatrix, precision, recall, sensitivity, specificity
## The following object is masked from 'package:base':
## 
##     kappa

Step 2. Inspect data

sample_n(marketing,3)
##   youtube facebook newspaper sales
## 1   80.28    14.04     44.16 11.64
## 2  287.76     4.92     44.28 14.76
## 3  273.60    45.24     38.40 25.80

** sales distribution**

p <- ggplot(marketing)+
    geom_histogram(aes(x = sales, y=..density..),
                   binwidth = 1, fill="grey", color="black") +   geom_density(aes(x=sales, color ="red"), show.legend = FALSE)

p + theme_bw()
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Step 3. Scaling techniques

preproc1 <- preProcess(marketing, method=c("center", "scale"))
norm1 <- predict(preproc1, marketing)
norm1
##         youtube     facebook    newspaper        sales
## 1    0.96742460  0.979065591  1.774492530  1.548168135
## 2   -1.19437904  1.080097401  0.667902716 -0.694303815
## 3   -1.51235985  1.524637364  1.779084189 -0.905134512
## 4    0.05191939  1.214806480  1.283185019  0.858176766
## 5    0.39319551 -0.839506984  1.278593360 -0.215143142
## 6   -1.61136487  1.726700983  2.040808751 -1.307629477
## 7   -1.04295960  0.642292892 -0.323895625 -0.425973838
## 8   -0.31265202 -0.246787034 -0.870303044 -0.157643861
## 9   -1.61252963 -1.425491481 -1.357018896 -1.767623723
## 10   0.61450084 -1.391814211 -0.429503781 -0.655970962
## 11  -0.94278982 -1.176279684 -0.291754012 -1.039299500
## 12   0.78805080  0.049572941 -1.219269126  0.647346069
## 13  -1.43548537  0.797208333  1.622967784 -0.924300938
## 14  -0.57705364 -1.055041512 -1.072336039 -0.828468804
## 15   0.66458573  0.649028346  0.709227646  0.954008900
## 16   0.56325118  1.645875535  1.026052116  1.605667416
## 17  -0.92298882  0.898240143  3.831555755 -0.291808850
## 18   1.56494899  1.100303763  1.159210227  1.988995954
## 19  -0.90668211 -0.186167948 -0.562661892 -0.521805973
## 20   0.00299927  0.042837487 -0.525928620  0.110686115
## 21   0.83114711  0.298784739  1.049010411  0.762344631
## 22   1.05245243 -1.223427861 -0.323895625 -0.291808850
## 23  -1.55895045 -0.495998831  0.874527370 -1.614292308
## 24   0.94645883 -0.428644291 -0.199920832  0.283183958
## 25  -0.98705089 -0.718268813 -0.562661892 -0.828468804
## 26   1.34946748 -1.331195125 -0.507561984 -0.387640985
## 27  -0.04825039  0.406552002 -0.824386454  0.187351823
## 28   1.08390109 -0.442115199 -0.351445579  0.359849666
## 29   1.18523563  0.258372015 -0.351445579  0.934842473
## 30  -0.89037540 -0.489263377  0.470461379 -0.675137388
## 31   1.69889695  0.339197463  0.580661195  1.414003146
## 32  -0.39767985 -0.394967022  0.369444882 -0.406807411
## 33  -0.58054794 -1.465904205 -0.025437791 -0.847635231
## 34   1.38091613 -0.219845218 -1.389160509  0.647346069
## 35  -0.59801941 -1.472639659 -1.063152721 -0.866801658
## 36   1.67327212 -1.290782401 -1.012644472 -0.234309569
## 37   1.39605808  1.383192830 -1.173352536  2.180660223
## 38  -0.84262004  1.760378253  0.695452669  0.129852542
## 39  -1.21068574  0.231430199  0.208736817 -0.751803096
## 40   0.94296453  0.972330137  0.066395389  1.433169573
## 41   0.64594949 -0.064929776  0.048028753  0.494014654
## 42   0.34893444  0.682705616  0.374036541  0.589846789
## 43   1.70705030  0.298784739 -1.320285624  1.279838158
## 44   0.69719914 -1.001157880 -0.190737514 -0.215143142
## 45  -1.42034342  0.164075659  0.585252854 -1.058465927
## 46   0.32680391 -0.051458868  0.043437094  0.168185396
## 47  -0.66790531 -0.900126070  0.236286771 -0.655970962
## 48   1.08157156  1.228277388 -0.553478574  1.758998831
## 49   0.93364642 -0.502734285  0.888302347  0.149018969
## 50  -0.93347170 -0.778887899  0.286795020 -0.828468804
## 51   0.61450084 -1.358136941  0.185778522 -0.502639546
## 52  -0.54327546 -0.920332432 -1.237635762 -0.636804535
## 53   0.80785181  1.241748296  0.415361472  1.644000269
## 54   0.41416128  1.544843726  1.292368337  1.375670293
## 55   1.34713795  0.372874732 -0.672861707  1.184006023
## 56   0.60401795  1.760378253  1.352059904  1.854830966
## 57  -1.62767157  0.325726555  0.498011333 -1.633458735
## 58  -0.12628963 -0.273728850 -0.640720094 -0.157643861
## 59   0.74262497  1.773849161  0.328119951  1.873997393
## 60   0.74146021  0.420022910 -0.975911200  0.839010339
## 61  -1.08955020 -1.432226935 -0.420320463 -1.135131635
## 62   1.33083124  1.309102836  1.108701978  1.950663100
## 63   1.07458297 -0.522940647 -0.149412583  0.321516812
## 64  -0.51648587  0.426758364 -1.017236131 -0.004312446
## 65  -0.18569264  1.315838290 -0.075946040  0.762344631
## 66  -0.90901164 -0.940538794 -1.361610555 -0.905134512
## 67  -1.34579847  0.089985665 -1.301918988 -0.866801658
## 68  -0.09018192 -0.590295187 -0.934586269 -0.119311008
## 69   1.05245243  0.285313831 -0.897852997  0.934842473
## 70   0.81251087  1.389928284 -0.154004242  1.586500989
## 71   0.60634748  0.494112904  0.374036541  0.819843912
## 72  -0.43378756 -0.603766095  0.052620412 -0.310975277
## 73  -1.40054242  0.655763800 -0.516745302 -1.000966646
## 74  -0.20549365 -1.183015138  0.034253776 -0.579305254
## 75   0.77290886  0.089985665 -0.801428159  0.570680362
## 76  -1.51585415  1.376457376  2.702007645 -1.020133073
## 77  -1.39238907 -1.459168751 -0.452462076 -1.365128758
## 78  -0.30915772  0.352668371 -0.750919910  0.034020408
## 79  -1.64980211  0.446964726 -0.971319541 -1.671791589
## 80  -0.36157214 -1.048306058 -0.342262261 -0.579305254
## 81  -0.82281904  0.231430199 -0.378995532 -0.425973838
## 82   1.08040679 -1.290782401  0.291386679 -0.330141704
## 83  -0.83563145 -0.199638856  0.089353684 -0.521805973
## 84  -0.91600023  1.430341008  0.231695112 -0.080978154
## 85   0.77407363  1.329309198  0.149045251  1.471502427
## 86   0.53762635 -0.327612482  1.613784466  0.225684677
## 87  -0.82398380  0.285313831 -0.668270048 -0.387640985
## 88  -0.42330468  1.167658302  1.498992991  0.379016092
## 89  -0.68421201  0.150604751  1.967342208 -0.215143142
## 90  -0.43378756  1.652610989  0.957177231  0.513181081
## 91  -0.14842017 -1.236898769 -0.975911200 -0.540972400
## 92  -1.37957665 -1.465904205  0.112311979 -1.288463050
## 93   0.82299375  0.689441070  1.306143314  1.030674608
## 94   1.20969569  0.891504689  1.916833959  1.567334562
## 95  -0.46174192 -0.623972457 -0.902444656 -0.483473119
## 96   0.18936165  0.561467444  1.026052116  0.551513935
## 97   0.58887601 -1.331195125 -1.132027606 -0.445140265
## 98   0.44095087 -0.152490678 -0.392770509  0.283183958
## 99   1.66162447  1.282161020  0.947993914  2.180660223
## 100 -0.13793728  1.241748296  0.704635987  0.609013216
## 101  0.87773770 -1.277311493  0.883710688 -0.445140265
## 102  1.73966372  0.878033781  3.230048428  1.873997393
## 103  1.55097181 -0.886655162 -0.420320463  0.149018969
## 104  0.47589381 -0.408437930 -0.581028528  0.129852542
## 105  1.06177055  0.743324702 -1.159577559  1.279838158
## 106 -0.10648863  1.558314633  1.306143314  0.992341754
## 107 -1.42150819 -0.826036076 -0.039212768 -1.307629477
## 108 -0.65975195 -1.546729653 -0.337670602 -1.020133073
## 109 -1.56011521 -1.539994199 -0.227470786 -1.671791589
## 110  1.26211011  0.244901107 -1.150394241  1.107340316
## 111  0.91733971 -1.014628788  1.191351840 -0.119311008
## 112  1.10253732  0.992536499 -0.337670602  1.490668854
## 113  0.33379250 -0.529676101 -1.292735670  0.014853981
## 114  0.72864780 -0.179432494 -0.911627974  0.359849666
## 115 -0.80185327  1.585256449  0.181186863  0.110686115
## 116 -0.83796098  0.790472879  1.016868798 -0.272642423
## 117 -0.09134669 -0.603766095 -0.227470786 -0.349308131
## 118 -0.82281904 -1.513052383 -0.723369956 -0.885968085
## 119 -0.24858995  0.918446505  2.233658429  0.359849666
## 120 -1.48673502 -0.489263377 -0.378995532 -1.422628038
## 121 -0.06688662  0.238165653  0.718410964  0.283183958
## 122 -1.49372361 -0.105342500  0.911260642 -1.345962331
## 123  0.89637394 -1.405285119 -0.686636684 -0.464306692
## 124 -0.27887383  0.763531064 -0.833569772  0.225684677
## 125  0.96043601  0.608615622  2.004075480  1.088173889
## 126 -0.69702443 -0.772152445 -0.213695809 -0.655970962
## 127 -1.62184775  1.053155585  0.920443960 -1.422628038
## 128 -0.77855797 -1.566936015 -0.980502859 -1.000966646
## 129  0.85327764  1.733436437 -1.256002398  2.046495235
## 130 -1.01849954 -0.758681537  0.576069536 -0.828468804
## 131 -1.70454606  1.100303763 -1.003461154 -2.380949385
## 132  1.37625707 -1.371607849  0.571477877 -0.253475996
## 133 -1.61485916  0.265107469 -1.306510647 -1.595125881
## 134  0.84745381  0.689441070  0.667902716  1.069007462
## 135 -1.28290117  1.032949223  1.609192807 -0.617638108
## 136 -1.15011797  1.598727357 -1.012644472 -0.464306692
## 137 -1.41451960  1.059891039 -0.975911200 -0.866801658
## 138  1.47526209  0.379610186  1.338284927  1.299004585
## 139 -1.21185051  0.177546567 -0.461645394 -0.847635231
## 140  0.44095087  1.389928284 -1.324877283  1.279838158
## 141 -0.85776198 -0.421908837 -0.810611477 -0.598471681
## 142  0.54345018  0.817414695  2.068358705  0.992341754
## 143  0.85560717  0.669234708  0.337303269  1.164839596
## 144 -0.49435534 -1.183015138  0.176595204 -0.694303815
## 145 -0.59219559 -0.570088825  0.383219859 -0.502639546
## 146 -0.07853427 -1.438962389 -0.989686177 -0.713470242
## 147  1.08390109 -1.075247874 -1.003461154 -0.157643861
## 148  1.12000880  1.733436437  0.631169444  2.180660223
## 149 -1.27008875  1.147451941 -0.856528067 -0.598471681
## 150 -1.19204951  0.170811113 -0.457053735 -0.751803096
## 151  1.55679563 -0.630707911  0.295978338  0.398182519
## 152 -0.30333390 -1.001157880  0.833202439 -0.464306692
## 153  0.58887601  0.002424763 -0.750919910  0.494014654
## 154  0.28254284  1.107039217  0.328119951  0.954008900
## 155  0.47472905 -0.145755224 -0.966727882  0.302350385
## 156 -1.66494405 -0.785623353 -1.141210924 -2.074286554
## 157 -0.61898518  1.362986468  0.915852301  0.244851104
## 158  0.03211839 -1.479375113 -0.287162353 -0.751803096
## 159 -1.57642192  0.918446505  0.672494375 -1.288463050
## 160 -0.17870405 -0.327612482  0.185778522 -0.215143142
## 161  0.29652002 -0.347818844  0.006703822  0.072353262
## 162 -0.71449590  0.844356511  0.860752393 -0.138477435
## 163  0.48171764 -0.347818844 -0.227470786  0.168185396
## 164  0.19169118  0.911711051 -1.063152721  0.762344631
## 165 -0.34759496 -0.576824279 -1.154985900 -0.406807411
## 166  1.01867425 -1.337930579  2.490791332 -0.406807411
## 167 -1.50420650  0.965594683 -0.411137145 -1.154298062
## 168  0.69603438 -1.216692407 -0.512153643 -0.349308131
## 169  0.79620416  0.022631125  1.241860088  0.589846789
## 170  1.59872717 -0.852977892 -1.109069311  0.187351823
## 171 -1.13031697 -0.785623353 -0.558070233 -1.077632354
## 172  0.20333883 -0.159226132  0.773510872  0.091519689
## 173 -1.48440549 -0.213109764 -0.622353458 -1.230963769
## 174  0.24876466 -1.088718782 -0.815203136 -0.445140265
## 175  0.87773770 -1.337930579 -0.801428159 -0.483473119
## 176  1.51253457  1.726700983  0.516377969  2.487323054
## 177  1.18057657  0.467171088 -0.470828712  1.184006023
## 178  0.26973043 -1.041570604  0.213328476 -0.445140265
## 179  1.51020504 -1.412020573 -0.314712307 -0.425973838
## 180  0.21615124 -0.893390616 -0.594803505 -0.272642423
## 181  0.11132240 -1.391814211 -1.021827790 -0.675137388
## 182  0.83231187 -1.203221500 -0.144820924 -0.349308131
## 183 -1.05810154 -1.183015138 -0.039212768 -1.020133073
## 184  1.63716441  1.329309198  1.893875664  2.333991639
## 185  1.24347388 -0.132284316 -0.025437791  0.685678923
## 186  0.67506861  1.470753732 -0.502970325  1.644000269
## 187 -0.08785239 -1.425491481 -0.181554196 -0.713470242
## 188  0.51316629  0.366139279 -0.567253551  0.628179642
## 189  1.61852817 -0.630707911 -1.233044103  0.359849666
## 190 -1.49488838 -0.751946083 -0.328487284 -1.403461612
## 191 -1.25261728  1.201335572 -1.136619265 -0.617638108
## 192 -0.83330192 -0.839506984 -1.127435947 -0.790135950
## 193 -1.51235985 -1.290782401  0.048028753 -1.556793027
## 194  0.23012842  1.261954658 -1.237635762  1.069007462
## 195  0.03095363  0.830885603 -1.127435947  0.628179642
## 196 -1.26775922 -1.317724217 -0.769286546 -1.230963769
## 197 -0.61549089 -1.236898769 -1.031011108 -0.828468804
## 198  0.34893444 -0.940538794 -1.109069311 -0.234309569
## 199  1.59057381  1.261954658  1.636742761  2.199826650
## 200  0.99071990 -0.987686972 -1.003461154 -0.119311008
preproc2 <- preProcess(marketing, method = c("range"))
norm2 <- predict(preproc2, marketing)
norm2
##        youtube    facebook   newspaper      sales
## 1   0.77578627 0.762096774 0.605980651 0.80708661
## 2   0.14812310 0.792338710 0.394019349 0.34645669
## 3   0.05579980 0.925403226 0.606860158 0.30314961
## 4   0.50997633 0.832661290 0.511873351 0.66535433
## 5   0.60906324 0.217741935 0.510993843 0.44488189
## 6   0.02705445 0.985887097 0.656992084 0.22047244
## 7   0.19208657 0.661290323 0.204045734 0.40157480
## 8   0.40412580 0.395161290 0.099384345 0.45669291
## 9   0.02671627 0.042338710 0.006156552 0.12598425
## 10  0.67331755 0.052419355 0.183817062 0.35433071
## 11  0.22117010 0.116935484 0.210202287 0.27559055
## 12  0.72370646 0.483870968 0.032541777 0.62204724
## 13  0.07811972 0.707661290 0.576956904 0.29921260
## 14  0.32735881 0.153225806 0.060686016 0.31889764
## 15  0.68785932 0.663306452 0.401934916 0.68503937
## 16  0.65843761 0.961693548 0.462620932 0.81889764
## 17  0.22691917 0.737903226 1.000000000 0.42913386
## 18  0.94927291 0.798387097 0.488126649 0.89763780
## 19  0.23165370 0.413306452 0.158311346 0.38188976
## 20  0.49577274 0.481854839 0.165347405 0.51181102
## 21  0.73621914 0.558467742 0.467018470 0.64566929
## 22  0.80047345 0.102822581 0.204045734 0.42913386
## 23  0.04227257 0.320564516 0.433597186 0.15748031
## 24  0.76969902 0.340725806 0.227792436 0.54724409
## 25  0.20831924 0.254032258 0.158311346 0.31889764
## 26  0.88670950 0.070564516 0.168865435 0.40944882
## 27  0.48089280 0.590725806 0.108179420 0.52755906
## 28  0.80960433 0.336693548 0.198768690 0.56299213
## 29  0.83902604 0.546370968 0.198768690 0.68110236
## 30  0.23638823 0.322580645 0.356200528 0.35039370
## 31  0.98816368 0.570564516 0.377308707 0.77952756
## 32  0.37943862 0.350806452 0.336851363 0.40551181
## 33  0.32634427 0.030241935 0.261213720 0.31496063
## 34  0.89584038 0.403225806 0.000000000 0.62204724
## 35  0.32127156 0.028225806 0.062445031 0.31102362
## 36  0.98072371 0.082661290 0.072119613 0.44094488
## 37  0.90023673 0.883064516 0.041336851 0.93700787
## 38  0.25025364 0.995967742 0.399296394 0.51574803
## 39  0.14338857 0.538306452 0.306068602 0.33464567
## 40  0.76868448 0.760080645 0.278803870 0.78346457
## 41  0.68244843 0.449596774 0.275285840 0.59055118
## 42  0.59621238 0.673387097 0.337730871 0.61023622
## 43  0.99053094 0.558467742 0.013192612 0.75196850
## 44  0.69732837 0.169354839 0.229551451 0.44488189
## 45  0.08251606 0.518145161 0.378188215 0.27165354
## 46  0.58978695 0.453629032 0.274406332 0.52362205
## 47  0.30098072 0.199596774 0.311345646 0.35433071
## 48  0.80892797 0.836693548 0.160070361 0.85039370
## 49  0.76597903 0.318548387 0.436235708 0.51968504
## 50  0.22387555 0.235887097 0.321020229 0.31889764
## 51  0.67331755 0.062500000 0.301671064 0.38582677
## 52  0.33716605 0.193548387 0.029023747 0.35826772
## 53  0.72945553 0.840725806 0.345646438 0.82677165
## 54  0.61515049 0.931451613 0.513632366 0.77165354
## 55  0.88603314 0.580645161 0.137203166 0.73228346
## 56  0.67027393 0.995967742 0.525065963 0.87007874
## 57  0.02231992 0.566532258 0.361477573 0.15354331
## 58  0.45823470 0.387096774 0.143359719 0.45669291
## 59  0.71051742 1.000000000 0.328935796 0.87401575
## 60  0.71017924 0.594758065 0.079155673 0.66141732
## 61  0.17855935 0.040322581 0.185576077 0.25590551
## 62  0.88129861 0.860887097 0.478452067 0.88976378
## 63  0.80689888 0.312500000 0.237467018 0.55511811
## 64  0.34494420 0.596774194 0.071240106 0.48818898
## 65  0.44098749 0.862903226 0.251539138 0.64566929
## 66  0.23097734 0.187500000 0.005277045 0.30314961
## 67  0.10415962 0.495967742 0.016710642 0.31102362
## 68  0.46871830 0.292338710 0.087071240 0.46456693
## 69  0.80047345 0.554435484 0.094107300 0.68110236
## 70  0.73080825 0.885080645 0.236587511 0.81496063
## 71  0.67095029 0.616935484 0.337730871 0.65748031
## 72  0.36895502 0.288306452 0.276165347 0.42519685
## 73  0.08826513 0.665322581 0.167106420 0.28346457
## 74  0.43523842 0.114919355 0.272647318 0.37007874
## 75  0.71931011 0.495967742 0.112576957 0.60629921
## 76  0.05478526 0.881048387 0.783641161 0.27952756
## 77  0.09063240 0.032258065 0.179419525 0.20866142
## 78  0.40514034 0.574596774 0.122251539 0.49606299
## 79  0.01589449 0.602822581 0.080035180 0.14566929
## 80  0.38992222 0.155241935 0.200527704 0.37007874
## 81  0.25600271 0.538306452 0.193491645 0.40157480
## 82  0.80858979 0.082661290 0.321899736 0.42125984
## 83  0.25228272 0.409274194 0.283201407 0.38188976
## 84  0.22894826 0.897177419 0.310466139 0.47244094
## 85  0.71964829 0.866935484 0.294635004 0.79133858
## 86  0.65099763 0.370967742 0.575197889 0.53543307
## 87  0.25566452 0.554435484 0.138082674 0.40944882
## 88  0.37199865 0.818548387 0.553210202 0.56692913
## 89  0.29624620 0.514112903 0.642919965 0.44488189
## 90  0.36895502 0.963709677 0.449428320 0.59448819
## 91  0.45180927 0.098790323 0.079155673 0.37795276
## 92  0.09435238 0.030241935 0.287598945 0.22440945
## 93  0.73385188 0.675403226 0.516270888 0.70078740
## 94  0.84612783 0.735887097 0.633245383 0.81102362
## 95  0.36083869 0.282258065 0.093227792 0.38976378
## 96  0.54988164 0.637096774 0.462620932 0.60236220
## 97  0.66587758 0.070564516 0.049252419 0.39763780
## 98  0.62292864 0.423387097 0.190853122 0.54724409
## 99  0.97734190 0.852822581 0.447669305 0.93700787
## 100 0.45485289 0.840725806 0.401055409 0.61417323
## 101 0.74974636 0.086693548 0.435356201 0.39763780
## 102 1.00000000 0.731854839 0.884784521 0.87401575
## 103 0.94521474 0.203629032 0.185576077 0.51968504
## 104 0.63307406 0.346774194 0.154793316 0.51574803
## 105 0.80317890 0.691532258 0.043975374 0.75196850
## 106 0.46398377 0.935483871 0.516270888 0.69291339
## 107 0.08217788 0.221774194 0.258575198 0.22047244
## 108 0.30334799 0.006048387 0.201407212 0.27952756
## 109 0.04193439 0.008064516 0.222515391 0.14566929
## 110 0.86134596 0.542338710 0.045734389 0.71653543
## 111 0.76124450 0.165322581 0.494283201 0.46456693
## 112 0.81501522 0.766129032 0.201407212 0.79527559
## 113 0.59181603 0.310483871 0.018469657 0.49212598
## 114 0.70645925 0.415322581 0.091468777 0.56299213
## 115 0.26208996 0.943548387 0.300791557 0.51181102
## 116 0.25160636 0.705645161 0.460861917 0.43307087
## 117 0.46838011 0.288306452 0.222515391 0.41732283
## 118 0.25600271 0.016129032 0.127528584 0.30708661
## 119 0.42272574 0.743951613 0.693931398 0.56299213
## 120 0.06323977 0.322580645 0.193491645 0.19685039
## 121 0.47548191 0.540322581 0.403693931 0.54724409
## 122 0.06121069 0.437500000 0.440633245 0.21259843
## 123 0.75515725 0.048387097 0.134564644 0.39370079
## 124 0.41393304 0.697580645 0.106420405 0.53543307
## 125 0.77375719 0.651209677 0.649956025 0.71259843
## 126 0.29252621 0.237903226 0.225153914 0.35433071
## 127 0.02401082 0.784274194 0.442392260 0.19685039
## 128 0.26885357 0.000000000 0.078276165 0.28346457
## 129 0.74264457 0.987903226 0.025505717 0.90944882
## 130 0.19918837 0.241935484 0.376429200 0.31889764
## 131 0.00000000 0.798387097 0.073878628 0.00000000
## 132 0.89448766 0.058467742 0.375549692 0.43700787
## 133 0.02603991 0.548387097 0.015831135 0.16141732
## 134 0.74095367 0.675403226 0.394019349 0.70866142
## 135 0.12242137 0.778225806 0.574318382 0.36220472
## 136 0.16097396 0.947580645 0.072119613 0.39370079
## 137 0.08420697 0.786290323 0.079155673 0.31102362
## 138 0.92323301 0.582661290 0.522427441 0.75590551
## 139 0.14305039 0.522177419 0.177660510 0.31496063
## 140 0.62292864 0.885080645 0.012313105 0.75196850
## 141 0.24585729 0.342741935 0.110817942 0.36614173
## 142 0.65268854 0.713709677 0.662269129 0.69291339
## 143 0.74332093 0.669354839 0.330694811 0.72834646
## 144 0.35136963 0.114919355 0.299912049 0.34645669
## 145 0.32296246 0.298387097 0.339489886 0.38582677
## 146 0.47210010 0.038306452 0.076517150 0.34251969
## 147 0.80960433 0.147177419 0.073878628 0.45669291
## 148 0.82008793 0.987903226 0.386983289 0.93700787
## 149 0.12614136 0.812500000 0.102022867 0.36614173
## 150 0.14879946 0.520161290 0.178540018 0.33464567
## 151 0.94690565 0.280241935 0.322779244 0.57086614
## 152 0.40683125 0.169354839 0.425681618 0.39370079
## 153 0.66587758 0.469758065 0.122251539 0.59055118
## 154 0.57693608 0.800403226 0.328935796 0.68503937
## 155 0.63273588 0.425403226 0.080914688 0.55118110
## 156 0.01149814 0.233870968 0.047493404 0.06299213
## 157 0.31518431 0.877016129 0.441512753 0.53937008
## 158 0.50422726 0.026209677 0.211081794 0.33464567
## 159 0.03719986 0.743951613 0.394898857 0.22440945
## 160 0.44301657 0.370967742 0.301671064 0.44488189
## 161 0.58099425 0.364919355 0.267370273 0.50393701
## 162 0.28745350 0.721774194 0.430958663 0.46062992
## 163 0.63476496 0.364919355 0.222515391 0.52362205
## 164 0.55055800 0.741935484 0.062445031 0.64566929
## 165 0.39398039 0.296370968 0.044854881 0.40551181
## 166 0.79066622 0.068548387 0.743183817 0.40551181
## 167 0.05816706 0.758064516 0.187335092 0.25196850
## 168 0.69699019 0.104838710 0.167985928 0.41732283
## 169 0.72607372 0.475806452 0.503957784 0.61023622
## 170 0.95908015 0.213709677 0.053649956 0.52755906
## 171 0.16672303 0.233870968 0.159190853 0.26771654
## 172 0.55393980 0.421370968 0.414248021 0.50787402
## 173 0.06391613 0.405241935 0.146877748 0.23622047
## 174 0.56712885 0.143145161 0.109938434 0.39763780
## 175 0.74974636 0.068548387 0.112576957 0.38976378
## 176 0.93405479 0.985887097 0.364995602 1.00000000
## 177 0.83767332 0.608870968 0.175901495 0.73228346
## 178 0.57321610 0.157258065 0.306948109 0.39763780
## 179 0.93337842 0.046370968 0.205804749 0.40157480
## 180 0.55765979 0.201612903 0.152154793 0.43307087
## 181 0.52722354 0.052419355 0.070360598 0.35039370
## 182 0.73655732 0.108870968 0.238346526 0.41732283
## 183 0.18769023 0.114919355 0.258575198 0.27952756
## 184 0.97024011 0.866935484 0.628847845 0.96850394
## 185 0.85593507 0.429435484 0.261213720 0.62992126
## 186 0.69090294 0.909274194 0.169744943 0.82677165
## 187 0.46939466 0.042338710 0.231310466 0.34251969
## 188 0.64389584 0.578629032 0.157431838 0.61811024
## 189 0.96482922 0.280241935 0.029903254 0.56299213
## 190 0.06087251 0.243951613 0.203166227 0.20078740
## 191 0.13121407 0.828629032 0.048372911 0.36220472
## 192 0.25295908 0.217741935 0.050131926 0.32677165
## 193 0.05579980 0.082661290 0.275285840 0.16929134
## 194 0.56171796 0.846774194 0.029023747 0.70866142
## 195 0.50388908 0.717741935 0.050131926 0.61811024
## 196 0.12681772 0.074596774 0.118733509 0.23622047
## 197 0.31619885 0.098790323 0.068601583 0.31889764
## 198 0.59621238 0.187500000 0.053649956 0.44094488
## 199 0.95671288 0.846774194 0.579595427 0.94094488
## 200 0.78254988 0.173387097 0.073878628 0.46456693

Step 4. Correlation

library(corrplot)
## corrplot 0.95 loaded
M <- cor(norm1)
p.mat <- cor.mtest(norm1)
print(p.mat)
## $p
##                youtube     facebook    newspaper        sales
## youtube   0.000000e+00 4.408061e-01 4.256018e-01 1.467390e-42
## facebook  4.408061e-01 0.000000e+00 2.688835e-07 4.354966e-19
## newspaper 4.256018e-01 2.688835e-07 0.000000e+00 1.148196e-03
## sales     1.467390e-42 4.354966e-19 1.148196e-03 0.000000e+00
## 
## $lowCI
##               youtube    facebook   newspaper     sales
## youtube    1.00000000 -0.08457548 -0.08274345 0.7218201
## facebook  -0.08457548  1.00000000  0.22648989 0.4754954
## newspaper -0.08274345  0.22648989  1.00000000 0.0924875
## sales      0.72182010  0.47549537  0.09248750 1.0000000
## 
## $uppCI
##             youtube  facebook newspaper     sales
## youtube   1.0000000 0.1920890 0.1938652 0.8308014
## facebook  0.1920890 1.0000000 0.4697658 0.6620366
## newspaper 0.1938652 0.4697658 1.0000000 0.3557712
## sales     0.8308014 0.6620366 0.3557712 1.0000000
corrplot(M, type ="upper", order="hclust", p.mat = p.mat$p, sig.level =0.05)

Step 5. Training and test sets

set.seed(123)
training.samples <- createDataPartition(y = norm1$sales, p = 0.8, list = FALSE)

train.data <- norm1[training.samples, ] 
test.data <- norm1[-training.samples, ]

Step 6. Build a model

model <- lm(sales ~., data = train.data)

predictions <- predict(model, test.data)

Step 7. Accuracy metrics

data.frame( RMSE = RMSE(predictions, test.data$sales),
R2 = R2(predictions, test.data$sales),
MAE = MAE(predictions, test.data$sales),
MSE = mse(predictions, test.data$sales))
##        RMSE        R2       MAE        MSE
## 1 0.3139314 0.9049049 0.2289764 0.09855291
data.frame
## function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, 
##     fix.empty.names = TRUE, stringsAsFactors = FALSE) 
## {
##     data.row.names <- if (check.rows && is.null(row.names)) 
##         function(current, new, i) {
##             if (is.character(current)) 
##                 new <- as.character(new)
##             if (is.character(new)) 
##                 current <- as.character(current)
##             if (anyDuplicated(new)) 
##                 return(current)
##             if (is.null(current)) 
##                 return(new)
##             if (all(current == new) || all(current == "")) 
##                 return(new)
##             stop(gettextf("mismatch of row names in arguments of 'data.frame', item %d", 
##                 i), domain = NA)
##         }
##     else function(current, new, i) {
##         current %||% if (anyDuplicated(new)) {
##             warning(gettextf("some row.names duplicated: %s --> row.names NOT used", 
##                 paste(which(duplicated(new)), collapse = ",")), 
##                 domain = NA)
##             current
##         }
##         else new
##     }
##     object <- as.list(substitute(list(...)))[-1L]
##     mirn <- missing(row.names)
##     mrn <- is.null(row.names)
##     x <- list(...)
##     n <- length(x)
##     if (n < 1L) {
##         if (!mrn) {
##             if (is.object(row.names) || !is.integer(row.names)) 
##                 row.names <- as.character(row.names)
##             if (anyNA(row.names)) 
##                 stop("row names contain missing values")
##             if (anyDuplicated(row.names)) 
##                 stop(gettextf("duplicate row.names: %s", paste(unique(row.names[duplicated(row.names)]), 
##                   collapse = ", ")), domain = NA)
##         }
##         else row.names <- integer()
##         return(structure(list(), names = character(), row.names = row.names, 
##             class = "data.frame"))
##     }
##     vnames <- names(x)
##     if (length(vnames) != n) 
##         vnames <- character(n)
##     no.vn <- !nzchar(vnames)
##     vlist <- vnames <- as.list(vnames)
##     nrows <- ncols <- integer(n)
##     for (i in seq_len(n)) {
##         xi <- if (is.character(x[[i]]) || is.list(x[[i]])) 
##             as.data.frame(x[[i]], optional = TRUE, stringsAsFactors = stringsAsFactors)
##         else as.data.frame(x[[i]], optional = TRUE)
##         nrows[i] <- .row_names_info(xi)
##         ncols[i] <- length(xi)
##         namesi <- names(xi)
##         if (ncols[i] > 1L) {
##             if (length(namesi) == 0L) 
##                 namesi <- seq_len(ncols[i])
##             vnames[[i]] <- if (no.vn[i]) 
##                 namesi
##             else paste(vnames[[i]], namesi, sep = ".")
##         }
##         else if (length(namesi)) {
##             vnames[[i]] <- namesi
##         }
##         else if (fix.empty.names && no.vn[[i]]) {
##             tmpname <- deparse(object[[i]], nlines = 1L)[1L]
##             if (startsWith(tmpname, "I(") && endsWith(tmpname, 
##                 ")")) {
##                 ntmpn <- nchar(tmpname, "c")
##                 tmpname <- substr(tmpname, 3L, ntmpn - 1L)
##             }
##             vnames[[i]] <- tmpname
##         }
##         if (mirn && nrows[i] > 0L) {
##             rowsi <- attr(xi, "row.names")
##             if (any(nzchar(rowsi))) 
##                 row.names <- data.row.names(row.names, rowsi, 
##                   i)
##         }
##         nrows[i] <- abs(nrows[i])
##         vlist[[i]] <- xi
##     }
##     nr <- max(nrows)
##     for (i in seq_len(n)[nrows < nr]) {
##         xi <- vlist[[i]]
##         if (nrows[i] > 0L && (nr%%nrows[i] == 0L)) {
##             xi <- unclass(xi)
##             fixed <- TRUE
##             for (j in seq_along(xi)) {
##                 xi1 <- xi[[j]]
##                 if (is.vector(xi1) || is.factor(xi1)) 
##                   xi[[j]] <- rep(xi1, length.out = nr)
##                 else if (is.character(xi1) && inherits(xi1, "AsIs")) 
##                   xi[[j]] <- structure(rep(xi1, length.out = nr), 
##                     class = class(xi1))
##                 else if (inherits(xi1, "Date") || inherits(xi1, 
##                   "POSIXct")) 
##                   xi[[j]] <- rep(xi1, length.out = nr)
##                 else {
##                   fixed <- FALSE
##                   break
##                 }
##             }
##             if (fixed) {
##                 vlist[[i]] <- xi
##                 next
##             }
##         }
##         stop(gettextf("arguments imply differing number of rows: %s", 
##             paste(unique(nrows), collapse = ", ")), domain = NA)
##     }
##     value <- unlist(vlist, recursive = FALSE, use.names = FALSE)
##     vnames <- as.character(unlist(vnames[ncols > 0L]))
##     if (fix.empty.names && any(noname <- !nzchar(vnames))) 
##         vnames[noname] <- paste0("Var.", seq_along(vnames))[noname]
##     if (check.names) {
##         if (fix.empty.names) 
##             vnames <- make.names(vnames, unique = TRUE)
##         else {
##             nz <- nzchar(vnames)
##             vnames[nz] <- make.names(vnames[nz], unique = TRUE)
##         }
##     }
##     names(value) <- vnames
##     if (!mrn) {
##         if (length(row.names) == 1L && nr != 1L) {
##             if (is.character(row.names)) 
##                 row.names <- match(row.names, vnames, 0L)
##             if (length(row.names) != 1L || row.names < 1L || 
##                 row.names > length(vnames)) 
##                 stop("'row.names' should specify one of the variables")
##             i <- row.names
##             row.names <- value[[i]]
##             value <- value[-i]
##         }
##         else if (!is.null(row.names) && length(row.names) != 
##             nr) 
##             stop("row names supplied are of the wrong length")
##     }
##     else if (!is.null(row.names) && length(row.names) != nr) {
##         warning("row names were found from a short variable and have been discarded")
##         row.names <- NULL
##     }
##     class(value) <- "data.frame"
##     if (is.null(row.names)) 
##         attr(value, "row.names") <- .set_row_names(nr)
##     else {
##         if (is.object(row.names) || !is.integer(row.names)) 
##             row.names <- as.character(row.names)
##         if (anyNA(row.names)) 
##             stop("row names contain missing values")
##         if (anyDuplicated(row.names)) 
##             stop(gettextf("duplicate row.names: %s", paste(unique(row.names[duplicated(row.names)]), 
##                 collapse = ", ")), domain = NA)
##         row.names(value) <- row.names
##     }
##     value
## }
## <bytecode: 0x0000014e44f14ca8>
## <environment: namespace:base>
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
vif(model)
##   youtube  facebook newspaper 
##  1.004440  1.118155  1.115449

Part 2. Regression types

1. Linear Regression

summary(swiss)
##    Fertility      Agriculture     Examination      Education    
##  Min.   :35.00   Min.   : 1.20   Min.   : 3.00   Min.   : 1.00  
##  1st Qu.:64.70   1st Qu.:35.90   1st Qu.:12.00   1st Qu.: 6.00  
##  Median :70.40   Median :54.10   Median :16.00   Median : 8.00  
##  Mean   :70.14   Mean   :50.66   Mean   :16.49   Mean   :10.98  
##  3rd Qu.:78.45   3rd Qu.:67.65   3rd Qu.:22.00   3rd Qu.:12.00  
##  Max.   :92.50   Max.   :89.70   Max.   :37.00   Max.   :53.00  
##     Catholic       Infant.Mortality
##  Min.   :  2.150   Min.   :10.80   
##  1st Qu.:  5.195   1st Qu.:18.15   
##  Median : 15.140   Median :20.00   
##  Mean   : 41.144   Mean   :19.94   
##  3rd Qu.: 93.125   3rd Qu.:21.70   
##  Max.   :100.000   Max.   :26.60
library(datasets)
model_swiss = lm(Fertility ~., data = swiss)
lm_coeff = model_swiss$coefficients

print(summary(model_swiss))
## 
## Call:
## lm(formula = Fertility ~ ., data = swiss)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.2743  -5.2617   0.5032   4.1198  15.3213 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      66.91518   10.70604   6.250 1.91e-07 ***
## Agriculture      -0.17211    0.07030  -2.448  0.01873 *  
## Examination      -0.25801    0.25388  -1.016  0.31546    
## Education        -0.87094    0.18303  -4.758 2.43e-05 ***
## Catholic          0.10412    0.03526   2.953  0.00519 ** 
## Infant.Mortality  1.07705    0.38172   2.822  0.00734 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.165 on 41 degrees of freedom
## Multiple R-squared:  0.7067, Adjusted R-squared:  0.671 
## F-statistic: 19.76 on 5 and 41 DF,  p-value: 5.594e-10

Note: 70% (R-squared) of the variation in Fertility rate can be explained via linear regression

2. Logistic regression

library(bestglm)
## Warning: package 'bestglm' was built under R version 4.5.1
## Loading required package: leaps
head(SAheart)
##   sbp tobacco  ldl adiposity famhist typea obesity alcohol age chd
## 1 160   12.00 5.73     23.11 Present    49   25.30   97.20  52   1
## 2 144    0.01 4.41     28.61  Absent    55   28.87    2.06  63   1
## 3 118    0.08 3.48     32.28 Present    52   29.14    3.81  46   0
## 4 170    7.50 6.41     38.03 Present    51   31.99   24.26  58   1
## 5 134   13.60 3.50     27.78 Present    60   25.99   57.34  49   1
## 6 132    6.20 6.47     36.21 Present    62   30.77   14.14  45   0
data("SAheart")
model_cholecterol = glm(chd ~ldl, data = SAheart, family = binomial)

plot(jitter(chd, factor = 0.1) ~ ldl, data = SAheart, pch = 20, 
     ylab = "Probability of CHD", 
     xlab = "Low Density Lipoprotein Cholesterol")
grid()

curve(predict(model_cholecterol, data.frame(ldl = x), type = "response"),
      add = TRUE, col="dodgerblue", lty = 2)

summary(model_cholecterol)
## 
## Call:
## glm(formula = chd ~ ldl, family = binomial, data = SAheart)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.96867    0.27308  -7.209 5.63e-13 ***
## ldl          0.27466    0.05164   5.319 1.04e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 596.11  on 461  degrees of freedom
## Residual deviance: 564.28  on 460  degrees of freedom
## AIC: 568.28
## 
## Number of Fisher Scoring iterations: 4

Note: The p-values (5.63e-13 and 1.04e-07) indicate that the relationship between LDL and CHD is highly significant. The curve shows that the probability of CHD increases as LDL levels rise.

3. Ridge regression

library(glmnet)
## Warning: package 'glmnet' was built under R version 4.5.1
## Loading required package: Matrix
## Loaded glmnet 4.1-10
head(swiss)
##              Fertility Agriculture Examination Education Catholic
## Courtelary        80.2        17.0          15        12     9.96
## Delemont          83.1        45.1           6         9    84.84
## Franches-Mnt      92.5        39.7           5         5    93.40
## Moutier           85.8        36.5          12         7    33.77
## Neuveville        76.9        43.5          17        15     5.16
## Porrentruy        76.1        35.3           9         7    90.57
##              Infant.Mortality
## Courtelary               22.2
## Delemont                 22.2
## Franches-Mnt             20.2
## Moutier                  20.3
## Neuveville               20.6
## Porrentruy               26.6

X contains the independent variable, y contains the dependent variable

X = swiss[,-1]
y = swiss[,1]
set.seed(123)
model_ridge = cv.glmnet(as.matrix(X), y, alpha =0, lambda = 10^seq(4,-1,-0.1))

best_lambda = model_ridge$lambda.min
ridge_coeff = predict(model_ridge, s= best_lambda, type = "coefficients")
ridge_coeff
## 6 x 1 sparse Matrix of class "dgCMatrix"
##                   s=1.584893
## (Intercept)      62.97585936
## Agriculture      -0.09863022
## Examination      -0.33967990
## Education        -0.64733678
## Catholic          0.07703325
## Infant.Mortality  1.08821833

Note: Agriculture, examination, and education are negatively associated with fertility, whereas Catholicism and infant mortality are positively associated with fertility.”

4. Lasso regression

set.seed(123)
model = cv.glmnet(as.matrix(X), y, alpha = 1, lambda = 10^seq(4,-1,-0.1))

best_lambda = model$lambda.min
lasso_coeff = predict(model, s = best_lambda, type = "coefficients")
lasso_coeff
## 6 x 1 sparse Matrix of class "dgCMatrix"
##                  s=0.1258925
## (Intercept)      65.46374579
## Agriculture      -0.14994107
## Examination      -0.24310141
## Education        -0.83632674
## Catholic          0.09913931
## Infant.Mortality  1.07238898

Note: Both ridge regression and lasso regression are addressed to deal with multicollinearity.