Part 1.
Step 1. load data
data("marketing", package = "datarium")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(ModelMetrics)
##
## Attaching package: 'ModelMetrics'
## The following objects are masked from 'package:caret':
##
## confusionMatrix, precision, recall, sensitivity, specificity
## The following object is masked from 'package:base':
##
## kappa
Step 2. Inspect data
sample_n(marketing,3)
## youtube facebook newspaper sales
## 1 80.28 14.04 44.16 11.64
## 2 287.76 4.92 44.28 14.76
## 3 273.60 45.24 38.40 25.80
** sales distribution**
p <- ggplot(marketing)+
geom_histogram(aes(x = sales, y=..density..),
binwidth = 1, fill="grey", color="black") + geom_density(aes(x=sales, color ="red"), show.legend = FALSE)
p + theme_bw()
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Step 3. Scaling techniques
preproc1 <- preProcess(marketing, method=c("center", "scale"))
norm1 <- predict(preproc1, marketing)
norm1
## youtube facebook newspaper sales
## 1 0.96742460 0.979065591 1.774492530 1.548168135
## 2 -1.19437904 1.080097401 0.667902716 -0.694303815
## 3 -1.51235985 1.524637364 1.779084189 -0.905134512
## 4 0.05191939 1.214806480 1.283185019 0.858176766
## 5 0.39319551 -0.839506984 1.278593360 -0.215143142
## 6 -1.61136487 1.726700983 2.040808751 -1.307629477
## 7 -1.04295960 0.642292892 -0.323895625 -0.425973838
## 8 -0.31265202 -0.246787034 -0.870303044 -0.157643861
## 9 -1.61252963 -1.425491481 -1.357018896 -1.767623723
## 10 0.61450084 -1.391814211 -0.429503781 -0.655970962
## 11 -0.94278982 -1.176279684 -0.291754012 -1.039299500
## 12 0.78805080 0.049572941 -1.219269126 0.647346069
## 13 -1.43548537 0.797208333 1.622967784 -0.924300938
## 14 -0.57705364 -1.055041512 -1.072336039 -0.828468804
## 15 0.66458573 0.649028346 0.709227646 0.954008900
## 16 0.56325118 1.645875535 1.026052116 1.605667416
## 17 -0.92298882 0.898240143 3.831555755 -0.291808850
## 18 1.56494899 1.100303763 1.159210227 1.988995954
## 19 -0.90668211 -0.186167948 -0.562661892 -0.521805973
## 20 0.00299927 0.042837487 -0.525928620 0.110686115
## 21 0.83114711 0.298784739 1.049010411 0.762344631
## 22 1.05245243 -1.223427861 -0.323895625 -0.291808850
## 23 -1.55895045 -0.495998831 0.874527370 -1.614292308
## 24 0.94645883 -0.428644291 -0.199920832 0.283183958
## 25 -0.98705089 -0.718268813 -0.562661892 -0.828468804
## 26 1.34946748 -1.331195125 -0.507561984 -0.387640985
## 27 -0.04825039 0.406552002 -0.824386454 0.187351823
## 28 1.08390109 -0.442115199 -0.351445579 0.359849666
## 29 1.18523563 0.258372015 -0.351445579 0.934842473
## 30 -0.89037540 -0.489263377 0.470461379 -0.675137388
## 31 1.69889695 0.339197463 0.580661195 1.414003146
## 32 -0.39767985 -0.394967022 0.369444882 -0.406807411
## 33 -0.58054794 -1.465904205 -0.025437791 -0.847635231
## 34 1.38091613 -0.219845218 -1.389160509 0.647346069
## 35 -0.59801941 -1.472639659 -1.063152721 -0.866801658
## 36 1.67327212 -1.290782401 -1.012644472 -0.234309569
## 37 1.39605808 1.383192830 -1.173352536 2.180660223
## 38 -0.84262004 1.760378253 0.695452669 0.129852542
## 39 -1.21068574 0.231430199 0.208736817 -0.751803096
## 40 0.94296453 0.972330137 0.066395389 1.433169573
## 41 0.64594949 -0.064929776 0.048028753 0.494014654
## 42 0.34893444 0.682705616 0.374036541 0.589846789
## 43 1.70705030 0.298784739 -1.320285624 1.279838158
## 44 0.69719914 -1.001157880 -0.190737514 -0.215143142
## 45 -1.42034342 0.164075659 0.585252854 -1.058465927
## 46 0.32680391 -0.051458868 0.043437094 0.168185396
## 47 -0.66790531 -0.900126070 0.236286771 -0.655970962
## 48 1.08157156 1.228277388 -0.553478574 1.758998831
## 49 0.93364642 -0.502734285 0.888302347 0.149018969
## 50 -0.93347170 -0.778887899 0.286795020 -0.828468804
## 51 0.61450084 -1.358136941 0.185778522 -0.502639546
## 52 -0.54327546 -0.920332432 -1.237635762 -0.636804535
## 53 0.80785181 1.241748296 0.415361472 1.644000269
## 54 0.41416128 1.544843726 1.292368337 1.375670293
## 55 1.34713795 0.372874732 -0.672861707 1.184006023
## 56 0.60401795 1.760378253 1.352059904 1.854830966
## 57 -1.62767157 0.325726555 0.498011333 -1.633458735
## 58 -0.12628963 -0.273728850 -0.640720094 -0.157643861
## 59 0.74262497 1.773849161 0.328119951 1.873997393
## 60 0.74146021 0.420022910 -0.975911200 0.839010339
## 61 -1.08955020 -1.432226935 -0.420320463 -1.135131635
## 62 1.33083124 1.309102836 1.108701978 1.950663100
## 63 1.07458297 -0.522940647 -0.149412583 0.321516812
## 64 -0.51648587 0.426758364 -1.017236131 -0.004312446
## 65 -0.18569264 1.315838290 -0.075946040 0.762344631
## 66 -0.90901164 -0.940538794 -1.361610555 -0.905134512
## 67 -1.34579847 0.089985665 -1.301918988 -0.866801658
## 68 -0.09018192 -0.590295187 -0.934586269 -0.119311008
## 69 1.05245243 0.285313831 -0.897852997 0.934842473
## 70 0.81251087 1.389928284 -0.154004242 1.586500989
## 71 0.60634748 0.494112904 0.374036541 0.819843912
## 72 -0.43378756 -0.603766095 0.052620412 -0.310975277
## 73 -1.40054242 0.655763800 -0.516745302 -1.000966646
## 74 -0.20549365 -1.183015138 0.034253776 -0.579305254
## 75 0.77290886 0.089985665 -0.801428159 0.570680362
## 76 -1.51585415 1.376457376 2.702007645 -1.020133073
## 77 -1.39238907 -1.459168751 -0.452462076 -1.365128758
## 78 -0.30915772 0.352668371 -0.750919910 0.034020408
## 79 -1.64980211 0.446964726 -0.971319541 -1.671791589
## 80 -0.36157214 -1.048306058 -0.342262261 -0.579305254
## 81 -0.82281904 0.231430199 -0.378995532 -0.425973838
## 82 1.08040679 -1.290782401 0.291386679 -0.330141704
## 83 -0.83563145 -0.199638856 0.089353684 -0.521805973
## 84 -0.91600023 1.430341008 0.231695112 -0.080978154
## 85 0.77407363 1.329309198 0.149045251 1.471502427
## 86 0.53762635 -0.327612482 1.613784466 0.225684677
## 87 -0.82398380 0.285313831 -0.668270048 -0.387640985
## 88 -0.42330468 1.167658302 1.498992991 0.379016092
## 89 -0.68421201 0.150604751 1.967342208 -0.215143142
## 90 -0.43378756 1.652610989 0.957177231 0.513181081
## 91 -0.14842017 -1.236898769 -0.975911200 -0.540972400
## 92 -1.37957665 -1.465904205 0.112311979 -1.288463050
## 93 0.82299375 0.689441070 1.306143314 1.030674608
## 94 1.20969569 0.891504689 1.916833959 1.567334562
## 95 -0.46174192 -0.623972457 -0.902444656 -0.483473119
## 96 0.18936165 0.561467444 1.026052116 0.551513935
## 97 0.58887601 -1.331195125 -1.132027606 -0.445140265
## 98 0.44095087 -0.152490678 -0.392770509 0.283183958
## 99 1.66162447 1.282161020 0.947993914 2.180660223
## 100 -0.13793728 1.241748296 0.704635987 0.609013216
## 101 0.87773770 -1.277311493 0.883710688 -0.445140265
## 102 1.73966372 0.878033781 3.230048428 1.873997393
## 103 1.55097181 -0.886655162 -0.420320463 0.149018969
## 104 0.47589381 -0.408437930 -0.581028528 0.129852542
## 105 1.06177055 0.743324702 -1.159577559 1.279838158
## 106 -0.10648863 1.558314633 1.306143314 0.992341754
## 107 -1.42150819 -0.826036076 -0.039212768 -1.307629477
## 108 -0.65975195 -1.546729653 -0.337670602 -1.020133073
## 109 -1.56011521 -1.539994199 -0.227470786 -1.671791589
## 110 1.26211011 0.244901107 -1.150394241 1.107340316
## 111 0.91733971 -1.014628788 1.191351840 -0.119311008
## 112 1.10253732 0.992536499 -0.337670602 1.490668854
## 113 0.33379250 -0.529676101 -1.292735670 0.014853981
## 114 0.72864780 -0.179432494 -0.911627974 0.359849666
## 115 -0.80185327 1.585256449 0.181186863 0.110686115
## 116 -0.83796098 0.790472879 1.016868798 -0.272642423
## 117 -0.09134669 -0.603766095 -0.227470786 -0.349308131
## 118 -0.82281904 -1.513052383 -0.723369956 -0.885968085
## 119 -0.24858995 0.918446505 2.233658429 0.359849666
## 120 -1.48673502 -0.489263377 -0.378995532 -1.422628038
## 121 -0.06688662 0.238165653 0.718410964 0.283183958
## 122 -1.49372361 -0.105342500 0.911260642 -1.345962331
## 123 0.89637394 -1.405285119 -0.686636684 -0.464306692
## 124 -0.27887383 0.763531064 -0.833569772 0.225684677
## 125 0.96043601 0.608615622 2.004075480 1.088173889
## 126 -0.69702443 -0.772152445 -0.213695809 -0.655970962
## 127 -1.62184775 1.053155585 0.920443960 -1.422628038
## 128 -0.77855797 -1.566936015 -0.980502859 -1.000966646
## 129 0.85327764 1.733436437 -1.256002398 2.046495235
## 130 -1.01849954 -0.758681537 0.576069536 -0.828468804
## 131 -1.70454606 1.100303763 -1.003461154 -2.380949385
## 132 1.37625707 -1.371607849 0.571477877 -0.253475996
## 133 -1.61485916 0.265107469 -1.306510647 -1.595125881
## 134 0.84745381 0.689441070 0.667902716 1.069007462
## 135 -1.28290117 1.032949223 1.609192807 -0.617638108
## 136 -1.15011797 1.598727357 -1.012644472 -0.464306692
## 137 -1.41451960 1.059891039 -0.975911200 -0.866801658
## 138 1.47526209 0.379610186 1.338284927 1.299004585
## 139 -1.21185051 0.177546567 -0.461645394 -0.847635231
## 140 0.44095087 1.389928284 -1.324877283 1.279838158
## 141 -0.85776198 -0.421908837 -0.810611477 -0.598471681
## 142 0.54345018 0.817414695 2.068358705 0.992341754
## 143 0.85560717 0.669234708 0.337303269 1.164839596
## 144 -0.49435534 -1.183015138 0.176595204 -0.694303815
## 145 -0.59219559 -0.570088825 0.383219859 -0.502639546
## 146 -0.07853427 -1.438962389 -0.989686177 -0.713470242
## 147 1.08390109 -1.075247874 -1.003461154 -0.157643861
## 148 1.12000880 1.733436437 0.631169444 2.180660223
## 149 -1.27008875 1.147451941 -0.856528067 -0.598471681
## 150 -1.19204951 0.170811113 -0.457053735 -0.751803096
## 151 1.55679563 -0.630707911 0.295978338 0.398182519
## 152 -0.30333390 -1.001157880 0.833202439 -0.464306692
## 153 0.58887601 0.002424763 -0.750919910 0.494014654
## 154 0.28254284 1.107039217 0.328119951 0.954008900
## 155 0.47472905 -0.145755224 -0.966727882 0.302350385
## 156 -1.66494405 -0.785623353 -1.141210924 -2.074286554
## 157 -0.61898518 1.362986468 0.915852301 0.244851104
## 158 0.03211839 -1.479375113 -0.287162353 -0.751803096
## 159 -1.57642192 0.918446505 0.672494375 -1.288463050
## 160 -0.17870405 -0.327612482 0.185778522 -0.215143142
## 161 0.29652002 -0.347818844 0.006703822 0.072353262
## 162 -0.71449590 0.844356511 0.860752393 -0.138477435
## 163 0.48171764 -0.347818844 -0.227470786 0.168185396
## 164 0.19169118 0.911711051 -1.063152721 0.762344631
## 165 -0.34759496 -0.576824279 -1.154985900 -0.406807411
## 166 1.01867425 -1.337930579 2.490791332 -0.406807411
## 167 -1.50420650 0.965594683 -0.411137145 -1.154298062
## 168 0.69603438 -1.216692407 -0.512153643 -0.349308131
## 169 0.79620416 0.022631125 1.241860088 0.589846789
## 170 1.59872717 -0.852977892 -1.109069311 0.187351823
## 171 -1.13031697 -0.785623353 -0.558070233 -1.077632354
## 172 0.20333883 -0.159226132 0.773510872 0.091519689
## 173 -1.48440549 -0.213109764 -0.622353458 -1.230963769
## 174 0.24876466 -1.088718782 -0.815203136 -0.445140265
## 175 0.87773770 -1.337930579 -0.801428159 -0.483473119
## 176 1.51253457 1.726700983 0.516377969 2.487323054
## 177 1.18057657 0.467171088 -0.470828712 1.184006023
## 178 0.26973043 -1.041570604 0.213328476 -0.445140265
## 179 1.51020504 -1.412020573 -0.314712307 -0.425973838
## 180 0.21615124 -0.893390616 -0.594803505 -0.272642423
## 181 0.11132240 -1.391814211 -1.021827790 -0.675137388
## 182 0.83231187 -1.203221500 -0.144820924 -0.349308131
## 183 -1.05810154 -1.183015138 -0.039212768 -1.020133073
## 184 1.63716441 1.329309198 1.893875664 2.333991639
## 185 1.24347388 -0.132284316 -0.025437791 0.685678923
## 186 0.67506861 1.470753732 -0.502970325 1.644000269
## 187 -0.08785239 -1.425491481 -0.181554196 -0.713470242
## 188 0.51316629 0.366139279 -0.567253551 0.628179642
## 189 1.61852817 -0.630707911 -1.233044103 0.359849666
## 190 -1.49488838 -0.751946083 -0.328487284 -1.403461612
## 191 -1.25261728 1.201335572 -1.136619265 -0.617638108
## 192 -0.83330192 -0.839506984 -1.127435947 -0.790135950
## 193 -1.51235985 -1.290782401 0.048028753 -1.556793027
## 194 0.23012842 1.261954658 -1.237635762 1.069007462
## 195 0.03095363 0.830885603 -1.127435947 0.628179642
## 196 -1.26775922 -1.317724217 -0.769286546 -1.230963769
## 197 -0.61549089 -1.236898769 -1.031011108 -0.828468804
## 198 0.34893444 -0.940538794 -1.109069311 -0.234309569
## 199 1.59057381 1.261954658 1.636742761 2.199826650
## 200 0.99071990 -0.987686972 -1.003461154 -0.119311008
preproc2 <- preProcess(marketing, method = c("range"))
norm2 <- predict(preproc2, marketing)
norm2
## youtube facebook newspaper sales
## 1 0.77578627 0.762096774 0.605980651 0.80708661
## 2 0.14812310 0.792338710 0.394019349 0.34645669
## 3 0.05579980 0.925403226 0.606860158 0.30314961
## 4 0.50997633 0.832661290 0.511873351 0.66535433
## 5 0.60906324 0.217741935 0.510993843 0.44488189
## 6 0.02705445 0.985887097 0.656992084 0.22047244
## 7 0.19208657 0.661290323 0.204045734 0.40157480
## 8 0.40412580 0.395161290 0.099384345 0.45669291
## 9 0.02671627 0.042338710 0.006156552 0.12598425
## 10 0.67331755 0.052419355 0.183817062 0.35433071
## 11 0.22117010 0.116935484 0.210202287 0.27559055
## 12 0.72370646 0.483870968 0.032541777 0.62204724
## 13 0.07811972 0.707661290 0.576956904 0.29921260
## 14 0.32735881 0.153225806 0.060686016 0.31889764
## 15 0.68785932 0.663306452 0.401934916 0.68503937
## 16 0.65843761 0.961693548 0.462620932 0.81889764
## 17 0.22691917 0.737903226 1.000000000 0.42913386
## 18 0.94927291 0.798387097 0.488126649 0.89763780
## 19 0.23165370 0.413306452 0.158311346 0.38188976
## 20 0.49577274 0.481854839 0.165347405 0.51181102
## 21 0.73621914 0.558467742 0.467018470 0.64566929
## 22 0.80047345 0.102822581 0.204045734 0.42913386
## 23 0.04227257 0.320564516 0.433597186 0.15748031
## 24 0.76969902 0.340725806 0.227792436 0.54724409
## 25 0.20831924 0.254032258 0.158311346 0.31889764
## 26 0.88670950 0.070564516 0.168865435 0.40944882
## 27 0.48089280 0.590725806 0.108179420 0.52755906
## 28 0.80960433 0.336693548 0.198768690 0.56299213
## 29 0.83902604 0.546370968 0.198768690 0.68110236
## 30 0.23638823 0.322580645 0.356200528 0.35039370
## 31 0.98816368 0.570564516 0.377308707 0.77952756
## 32 0.37943862 0.350806452 0.336851363 0.40551181
## 33 0.32634427 0.030241935 0.261213720 0.31496063
## 34 0.89584038 0.403225806 0.000000000 0.62204724
## 35 0.32127156 0.028225806 0.062445031 0.31102362
## 36 0.98072371 0.082661290 0.072119613 0.44094488
## 37 0.90023673 0.883064516 0.041336851 0.93700787
## 38 0.25025364 0.995967742 0.399296394 0.51574803
## 39 0.14338857 0.538306452 0.306068602 0.33464567
## 40 0.76868448 0.760080645 0.278803870 0.78346457
## 41 0.68244843 0.449596774 0.275285840 0.59055118
## 42 0.59621238 0.673387097 0.337730871 0.61023622
## 43 0.99053094 0.558467742 0.013192612 0.75196850
## 44 0.69732837 0.169354839 0.229551451 0.44488189
## 45 0.08251606 0.518145161 0.378188215 0.27165354
## 46 0.58978695 0.453629032 0.274406332 0.52362205
## 47 0.30098072 0.199596774 0.311345646 0.35433071
## 48 0.80892797 0.836693548 0.160070361 0.85039370
## 49 0.76597903 0.318548387 0.436235708 0.51968504
## 50 0.22387555 0.235887097 0.321020229 0.31889764
## 51 0.67331755 0.062500000 0.301671064 0.38582677
## 52 0.33716605 0.193548387 0.029023747 0.35826772
## 53 0.72945553 0.840725806 0.345646438 0.82677165
## 54 0.61515049 0.931451613 0.513632366 0.77165354
## 55 0.88603314 0.580645161 0.137203166 0.73228346
## 56 0.67027393 0.995967742 0.525065963 0.87007874
## 57 0.02231992 0.566532258 0.361477573 0.15354331
## 58 0.45823470 0.387096774 0.143359719 0.45669291
## 59 0.71051742 1.000000000 0.328935796 0.87401575
## 60 0.71017924 0.594758065 0.079155673 0.66141732
## 61 0.17855935 0.040322581 0.185576077 0.25590551
## 62 0.88129861 0.860887097 0.478452067 0.88976378
## 63 0.80689888 0.312500000 0.237467018 0.55511811
## 64 0.34494420 0.596774194 0.071240106 0.48818898
## 65 0.44098749 0.862903226 0.251539138 0.64566929
## 66 0.23097734 0.187500000 0.005277045 0.30314961
## 67 0.10415962 0.495967742 0.016710642 0.31102362
## 68 0.46871830 0.292338710 0.087071240 0.46456693
## 69 0.80047345 0.554435484 0.094107300 0.68110236
## 70 0.73080825 0.885080645 0.236587511 0.81496063
## 71 0.67095029 0.616935484 0.337730871 0.65748031
## 72 0.36895502 0.288306452 0.276165347 0.42519685
## 73 0.08826513 0.665322581 0.167106420 0.28346457
## 74 0.43523842 0.114919355 0.272647318 0.37007874
## 75 0.71931011 0.495967742 0.112576957 0.60629921
## 76 0.05478526 0.881048387 0.783641161 0.27952756
## 77 0.09063240 0.032258065 0.179419525 0.20866142
## 78 0.40514034 0.574596774 0.122251539 0.49606299
## 79 0.01589449 0.602822581 0.080035180 0.14566929
## 80 0.38992222 0.155241935 0.200527704 0.37007874
## 81 0.25600271 0.538306452 0.193491645 0.40157480
## 82 0.80858979 0.082661290 0.321899736 0.42125984
## 83 0.25228272 0.409274194 0.283201407 0.38188976
## 84 0.22894826 0.897177419 0.310466139 0.47244094
## 85 0.71964829 0.866935484 0.294635004 0.79133858
## 86 0.65099763 0.370967742 0.575197889 0.53543307
## 87 0.25566452 0.554435484 0.138082674 0.40944882
## 88 0.37199865 0.818548387 0.553210202 0.56692913
## 89 0.29624620 0.514112903 0.642919965 0.44488189
## 90 0.36895502 0.963709677 0.449428320 0.59448819
## 91 0.45180927 0.098790323 0.079155673 0.37795276
## 92 0.09435238 0.030241935 0.287598945 0.22440945
## 93 0.73385188 0.675403226 0.516270888 0.70078740
## 94 0.84612783 0.735887097 0.633245383 0.81102362
## 95 0.36083869 0.282258065 0.093227792 0.38976378
## 96 0.54988164 0.637096774 0.462620932 0.60236220
## 97 0.66587758 0.070564516 0.049252419 0.39763780
## 98 0.62292864 0.423387097 0.190853122 0.54724409
## 99 0.97734190 0.852822581 0.447669305 0.93700787
## 100 0.45485289 0.840725806 0.401055409 0.61417323
## 101 0.74974636 0.086693548 0.435356201 0.39763780
## 102 1.00000000 0.731854839 0.884784521 0.87401575
## 103 0.94521474 0.203629032 0.185576077 0.51968504
## 104 0.63307406 0.346774194 0.154793316 0.51574803
## 105 0.80317890 0.691532258 0.043975374 0.75196850
## 106 0.46398377 0.935483871 0.516270888 0.69291339
## 107 0.08217788 0.221774194 0.258575198 0.22047244
## 108 0.30334799 0.006048387 0.201407212 0.27952756
## 109 0.04193439 0.008064516 0.222515391 0.14566929
## 110 0.86134596 0.542338710 0.045734389 0.71653543
## 111 0.76124450 0.165322581 0.494283201 0.46456693
## 112 0.81501522 0.766129032 0.201407212 0.79527559
## 113 0.59181603 0.310483871 0.018469657 0.49212598
## 114 0.70645925 0.415322581 0.091468777 0.56299213
## 115 0.26208996 0.943548387 0.300791557 0.51181102
## 116 0.25160636 0.705645161 0.460861917 0.43307087
## 117 0.46838011 0.288306452 0.222515391 0.41732283
## 118 0.25600271 0.016129032 0.127528584 0.30708661
## 119 0.42272574 0.743951613 0.693931398 0.56299213
## 120 0.06323977 0.322580645 0.193491645 0.19685039
## 121 0.47548191 0.540322581 0.403693931 0.54724409
## 122 0.06121069 0.437500000 0.440633245 0.21259843
## 123 0.75515725 0.048387097 0.134564644 0.39370079
## 124 0.41393304 0.697580645 0.106420405 0.53543307
## 125 0.77375719 0.651209677 0.649956025 0.71259843
## 126 0.29252621 0.237903226 0.225153914 0.35433071
## 127 0.02401082 0.784274194 0.442392260 0.19685039
## 128 0.26885357 0.000000000 0.078276165 0.28346457
## 129 0.74264457 0.987903226 0.025505717 0.90944882
## 130 0.19918837 0.241935484 0.376429200 0.31889764
## 131 0.00000000 0.798387097 0.073878628 0.00000000
## 132 0.89448766 0.058467742 0.375549692 0.43700787
## 133 0.02603991 0.548387097 0.015831135 0.16141732
## 134 0.74095367 0.675403226 0.394019349 0.70866142
## 135 0.12242137 0.778225806 0.574318382 0.36220472
## 136 0.16097396 0.947580645 0.072119613 0.39370079
## 137 0.08420697 0.786290323 0.079155673 0.31102362
## 138 0.92323301 0.582661290 0.522427441 0.75590551
## 139 0.14305039 0.522177419 0.177660510 0.31496063
## 140 0.62292864 0.885080645 0.012313105 0.75196850
## 141 0.24585729 0.342741935 0.110817942 0.36614173
## 142 0.65268854 0.713709677 0.662269129 0.69291339
## 143 0.74332093 0.669354839 0.330694811 0.72834646
## 144 0.35136963 0.114919355 0.299912049 0.34645669
## 145 0.32296246 0.298387097 0.339489886 0.38582677
## 146 0.47210010 0.038306452 0.076517150 0.34251969
## 147 0.80960433 0.147177419 0.073878628 0.45669291
## 148 0.82008793 0.987903226 0.386983289 0.93700787
## 149 0.12614136 0.812500000 0.102022867 0.36614173
## 150 0.14879946 0.520161290 0.178540018 0.33464567
## 151 0.94690565 0.280241935 0.322779244 0.57086614
## 152 0.40683125 0.169354839 0.425681618 0.39370079
## 153 0.66587758 0.469758065 0.122251539 0.59055118
## 154 0.57693608 0.800403226 0.328935796 0.68503937
## 155 0.63273588 0.425403226 0.080914688 0.55118110
## 156 0.01149814 0.233870968 0.047493404 0.06299213
## 157 0.31518431 0.877016129 0.441512753 0.53937008
## 158 0.50422726 0.026209677 0.211081794 0.33464567
## 159 0.03719986 0.743951613 0.394898857 0.22440945
## 160 0.44301657 0.370967742 0.301671064 0.44488189
## 161 0.58099425 0.364919355 0.267370273 0.50393701
## 162 0.28745350 0.721774194 0.430958663 0.46062992
## 163 0.63476496 0.364919355 0.222515391 0.52362205
## 164 0.55055800 0.741935484 0.062445031 0.64566929
## 165 0.39398039 0.296370968 0.044854881 0.40551181
## 166 0.79066622 0.068548387 0.743183817 0.40551181
## 167 0.05816706 0.758064516 0.187335092 0.25196850
## 168 0.69699019 0.104838710 0.167985928 0.41732283
## 169 0.72607372 0.475806452 0.503957784 0.61023622
## 170 0.95908015 0.213709677 0.053649956 0.52755906
## 171 0.16672303 0.233870968 0.159190853 0.26771654
## 172 0.55393980 0.421370968 0.414248021 0.50787402
## 173 0.06391613 0.405241935 0.146877748 0.23622047
## 174 0.56712885 0.143145161 0.109938434 0.39763780
## 175 0.74974636 0.068548387 0.112576957 0.38976378
## 176 0.93405479 0.985887097 0.364995602 1.00000000
## 177 0.83767332 0.608870968 0.175901495 0.73228346
## 178 0.57321610 0.157258065 0.306948109 0.39763780
## 179 0.93337842 0.046370968 0.205804749 0.40157480
## 180 0.55765979 0.201612903 0.152154793 0.43307087
## 181 0.52722354 0.052419355 0.070360598 0.35039370
## 182 0.73655732 0.108870968 0.238346526 0.41732283
## 183 0.18769023 0.114919355 0.258575198 0.27952756
## 184 0.97024011 0.866935484 0.628847845 0.96850394
## 185 0.85593507 0.429435484 0.261213720 0.62992126
## 186 0.69090294 0.909274194 0.169744943 0.82677165
## 187 0.46939466 0.042338710 0.231310466 0.34251969
## 188 0.64389584 0.578629032 0.157431838 0.61811024
## 189 0.96482922 0.280241935 0.029903254 0.56299213
## 190 0.06087251 0.243951613 0.203166227 0.20078740
## 191 0.13121407 0.828629032 0.048372911 0.36220472
## 192 0.25295908 0.217741935 0.050131926 0.32677165
## 193 0.05579980 0.082661290 0.275285840 0.16929134
## 194 0.56171796 0.846774194 0.029023747 0.70866142
## 195 0.50388908 0.717741935 0.050131926 0.61811024
## 196 0.12681772 0.074596774 0.118733509 0.23622047
## 197 0.31619885 0.098790323 0.068601583 0.31889764
## 198 0.59621238 0.187500000 0.053649956 0.44094488
## 199 0.95671288 0.846774194 0.579595427 0.94094488
## 200 0.78254988 0.173387097 0.073878628 0.46456693
Step 4. Correlation
library(corrplot)
## corrplot 0.95 loaded
M <- cor(norm1)
p.mat <- cor.mtest(norm1)
print(p.mat)
## $p
## youtube facebook newspaper sales
## youtube 0.000000e+00 4.408061e-01 4.256018e-01 1.467390e-42
## facebook 4.408061e-01 0.000000e+00 2.688835e-07 4.354966e-19
## newspaper 4.256018e-01 2.688835e-07 0.000000e+00 1.148196e-03
## sales 1.467390e-42 4.354966e-19 1.148196e-03 0.000000e+00
##
## $lowCI
## youtube facebook newspaper sales
## youtube 1.00000000 -0.08457548 -0.08274345 0.7218201
## facebook -0.08457548 1.00000000 0.22648989 0.4754954
## newspaper -0.08274345 0.22648989 1.00000000 0.0924875
## sales 0.72182010 0.47549537 0.09248750 1.0000000
##
## $uppCI
## youtube facebook newspaper sales
## youtube 1.0000000 0.1920890 0.1938652 0.8308014
## facebook 0.1920890 1.0000000 0.4697658 0.6620366
## newspaper 0.1938652 0.4697658 1.0000000 0.3557712
## sales 0.8308014 0.6620366 0.3557712 1.0000000
corrplot(M, type ="upper", order="hclust", p.mat = p.mat$p, sig.level =0.05)
Step 5. Training and test sets
set.seed(123)
training.samples <- createDataPartition(y = norm1$sales, p = 0.8, list = FALSE)
train.data <- norm1[training.samples, ]
test.data <- norm1[-training.samples, ]
Step 6. Build a model
model <- lm(sales ~., data = train.data)
predictions <- predict(model, test.data)
Step 7. Accuracy metrics
data.frame( RMSE = RMSE(predictions, test.data$sales),
R2 = R2(predictions, test.data$sales),
MAE = MAE(predictions, test.data$sales),
MSE = mse(predictions, test.data$sales))
## RMSE R2 MAE MSE
## 1 0.3139314 0.9049049 0.2289764 0.09855291
data.frame
## function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE,
## fix.empty.names = TRUE, stringsAsFactors = FALSE)
## {
## data.row.names <- if (check.rows && is.null(row.names))
## function(current, new, i) {
## if (is.character(current))
## new <- as.character(new)
## if (is.character(new))
## current <- as.character(current)
## if (anyDuplicated(new))
## return(current)
## if (is.null(current))
## return(new)
## if (all(current == new) || all(current == ""))
## return(new)
## stop(gettextf("mismatch of row names in arguments of 'data.frame', item %d",
## i), domain = NA)
## }
## else function(current, new, i) {
## current %||% if (anyDuplicated(new)) {
## warning(gettextf("some row.names duplicated: %s --> row.names NOT used",
## paste(which(duplicated(new)), collapse = ",")),
## domain = NA)
## current
## }
## else new
## }
## object <- as.list(substitute(list(...)))[-1L]
## mirn <- missing(row.names)
## mrn <- is.null(row.names)
## x <- list(...)
## n <- length(x)
## if (n < 1L) {
## if (!mrn) {
## if (is.object(row.names) || !is.integer(row.names))
## row.names <- as.character(row.names)
## if (anyNA(row.names))
## stop("row names contain missing values")
## if (anyDuplicated(row.names))
## stop(gettextf("duplicate row.names: %s", paste(unique(row.names[duplicated(row.names)]),
## collapse = ", ")), domain = NA)
## }
## else row.names <- integer()
## return(structure(list(), names = character(), row.names = row.names,
## class = "data.frame"))
## }
## vnames <- names(x)
## if (length(vnames) != n)
## vnames <- character(n)
## no.vn <- !nzchar(vnames)
## vlist <- vnames <- as.list(vnames)
## nrows <- ncols <- integer(n)
## for (i in seq_len(n)) {
## xi <- if (is.character(x[[i]]) || is.list(x[[i]]))
## as.data.frame(x[[i]], optional = TRUE, stringsAsFactors = stringsAsFactors)
## else as.data.frame(x[[i]], optional = TRUE)
## nrows[i] <- .row_names_info(xi)
## ncols[i] <- length(xi)
## namesi <- names(xi)
## if (ncols[i] > 1L) {
## if (length(namesi) == 0L)
## namesi <- seq_len(ncols[i])
## vnames[[i]] <- if (no.vn[i])
## namesi
## else paste(vnames[[i]], namesi, sep = ".")
## }
## else if (length(namesi)) {
## vnames[[i]] <- namesi
## }
## else if (fix.empty.names && no.vn[[i]]) {
## tmpname <- deparse(object[[i]], nlines = 1L)[1L]
## if (startsWith(tmpname, "I(") && endsWith(tmpname,
## ")")) {
## ntmpn <- nchar(tmpname, "c")
## tmpname <- substr(tmpname, 3L, ntmpn - 1L)
## }
## vnames[[i]] <- tmpname
## }
## if (mirn && nrows[i] > 0L) {
## rowsi <- attr(xi, "row.names")
## if (any(nzchar(rowsi)))
## row.names <- data.row.names(row.names, rowsi,
## i)
## }
## nrows[i] <- abs(nrows[i])
## vlist[[i]] <- xi
## }
## nr <- max(nrows)
## for (i in seq_len(n)[nrows < nr]) {
## xi <- vlist[[i]]
## if (nrows[i] > 0L && (nr%%nrows[i] == 0L)) {
## xi <- unclass(xi)
## fixed <- TRUE
## for (j in seq_along(xi)) {
## xi1 <- xi[[j]]
## if (is.vector(xi1) || is.factor(xi1))
## xi[[j]] <- rep(xi1, length.out = nr)
## else if (is.character(xi1) && inherits(xi1, "AsIs"))
## xi[[j]] <- structure(rep(xi1, length.out = nr),
## class = class(xi1))
## else if (inherits(xi1, "Date") || inherits(xi1,
## "POSIXct"))
## xi[[j]] <- rep(xi1, length.out = nr)
## else {
## fixed <- FALSE
## break
## }
## }
## if (fixed) {
## vlist[[i]] <- xi
## next
## }
## }
## stop(gettextf("arguments imply differing number of rows: %s",
## paste(unique(nrows), collapse = ", ")), domain = NA)
## }
## value <- unlist(vlist, recursive = FALSE, use.names = FALSE)
## vnames <- as.character(unlist(vnames[ncols > 0L]))
## if (fix.empty.names && any(noname <- !nzchar(vnames)))
## vnames[noname] <- paste0("Var.", seq_along(vnames))[noname]
## if (check.names) {
## if (fix.empty.names)
## vnames <- make.names(vnames, unique = TRUE)
## else {
## nz <- nzchar(vnames)
## vnames[nz] <- make.names(vnames[nz], unique = TRUE)
## }
## }
## names(value) <- vnames
## if (!mrn) {
## if (length(row.names) == 1L && nr != 1L) {
## if (is.character(row.names))
## row.names <- match(row.names, vnames, 0L)
## if (length(row.names) != 1L || row.names < 1L ||
## row.names > length(vnames))
## stop("'row.names' should specify one of the variables")
## i <- row.names
## row.names <- value[[i]]
## value <- value[-i]
## }
## else if (!is.null(row.names) && length(row.names) !=
## nr)
## stop("row names supplied are of the wrong length")
## }
## else if (!is.null(row.names) && length(row.names) != nr) {
## warning("row names were found from a short variable and have been discarded")
## row.names <- NULL
## }
## class(value) <- "data.frame"
## if (is.null(row.names))
## attr(value, "row.names") <- .set_row_names(nr)
## else {
## if (is.object(row.names) || !is.integer(row.names))
## row.names <- as.character(row.names)
## if (anyNA(row.names))
## stop("row names contain missing values")
## if (anyDuplicated(row.names))
## stop(gettextf("duplicate row.names: %s", paste(unique(row.names[duplicated(row.names)]),
## collapse = ", ")), domain = NA)
## row.names(value) <- row.names
## }
## value
## }
## <bytecode: 0x0000014e44f14ca8>
## <environment: namespace:base>
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
vif(model)
## youtube facebook newspaper
## 1.004440 1.118155 1.115449
Part 2. Regression types
summary(swiss)
## Fertility Agriculture Examination Education
## Min. :35.00 Min. : 1.20 Min. : 3.00 Min. : 1.00
## 1st Qu.:64.70 1st Qu.:35.90 1st Qu.:12.00 1st Qu.: 6.00
## Median :70.40 Median :54.10 Median :16.00 Median : 8.00
## Mean :70.14 Mean :50.66 Mean :16.49 Mean :10.98
## 3rd Qu.:78.45 3rd Qu.:67.65 3rd Qu.:22.00 3rd Qu.:12.00
## Max. :92.50 Max. :89.70 Max. :37.00 Max. :53.00
## Catholic Infant.Mortality
## Min. : 2.150 Min. :10.80
## 1st Qu.: 5.195 1st Qu.:18.15
## Median : 15.140 Median :20.00
## Mean : 41.144 Mean :19.94
## 3rd Qu.: 93.125 3rd Qu.:21.70
## Max. :100.000 Max. :26.60
library(datasets)
model_swiss = lm(Fertility ~., data = swiss)
lm_coeff = model_swiss$coefficients
print(summary(model_swiss))
##
## Call:
## lm(formula = Fertility ~ ., data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2743 -5.2617 0.5032 4.1198 15.3213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.91518 10.70604 6.250 1.91e-07 ***
## Agriculture -0.17211 0.07030 -2.448 0.01873 *
## Examination -0.25801 0.25388 -1.016 0.31546
## Education -0.87094 0.18303 -4.758 2.43e-05 ***
## Catholic 0.10412 0.03526 2.953 0.00519 **
## Infant.Mortality 1.07705 0.38172 2.822 0.00734 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.165 on 41 degrees of freedom
## Multiple R-squared: 0.7067, Adjusted R-squared: 0.671
## F-statistic: 19.76 on 5 and 41 DF, p-value: 5.594e-10
Note: 70% (R-squared) of the variation in Fertility rate can be explained via linear regression
library(bestglm)
## Warning: package 'bestglm' was built under R version 4.5.1
## Loading required package: leaps
head(SAheart)
## sbp tobacco ldl adiposity famhist typea obesity alcohol age chd
## 1 160 12.00 5.73 23.11 Present 49 25.30 97.20 52 1
## 2 144 0.01 4.41 28.61 Absent 55 28.87 2.06 63 1
## 3 118 0.08 3.48 32.28 Present 52 29.14 3.81 46 0
## 4 170 7.50 6.41 38.03 Present 51 31.99 24.26 58 1
## 5 134 13.60 3.50 27.78 Present 60 25.99 57.34 49 1
## 6 132 6.20 6.47 36.21 Present 62 30.77 14.14 45 0
data("SAheart")
model_cholecterol = glm(chd ~ldl, data = SAheart, family = binomial)
plot(jitter(chd, factor = 0.1) ~ ldl, data = SAheart, pch = 20,
ylab = "Probability of CHD",
xlab = "Low Density Lipoprotein Cholesterol")
grid()
curve(predict(model_cholecterol, data.frame(ldl = x), type = "response"),
add = TRUE, col="dodgerblue", lty = 2)
summary(model_cholecterol)
##
## Call:
## glm(formula = chd ~ ldl, family = binomial, data = SAheart)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.96867 0.27308 -7.209 5.63e-13 ***
## ldl 0.27466 0.05164 5.319 1.04e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 596.11 on 461 degrees of freedom
## Residual deviance: 564.28 on 460 degrees of freedom
## AIC: 568.28
##
## Number of Fisher Scoring iterations: 4
Note: The p-values (5.63e-13 and 1.04e-07) indicate that the relationship between LDL and CHD is highly significant. The curve shows that the probability of CHD increases as LDL levels rise.
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.5.1
## Loading required package: Matrix
## Loaded glmnet 4.1-10
head(swiss)
## Fertility Agriculture Examination Education Catholic
## Courtelary 80.2 17.0 15 12 9.96
## Delemont 83.1 45.1 6 9 84.84
## Franches-Mnt 92.5 39.7 5 5 93.40
## Moutier 85.8 36.5 12 7 33.77
## Neuveville 76.9 43.5 17 15 5.16
## Porrentruy 76.1 35.3 9 7 90.57
## Infant.Mortality
## Courtelary 22.2
## Delemont 22.2
## Franches-Mnt 20.2
## Moutier 20.3
## Neuveville 20.6
## Porrentruy 26.6
X = swiss[,-1]
y = swiss[,1]
set.seed(123)
model_ridge = cv.glmnet(as.matrix(X), y, alpha =0, lambda = 10^seq(4,-1,-0.1))
best_lambda = model_ridge$lambda.min
ridge_coeff = predict(model_ridge, s= best_lambda, type = "coefficients")
ridge_coeff
## 6 x 1 sparse Matrix of class "dgCMatrix"
## s=1.584893
## (Intercept) 62.97585936
## Agriculture -0.09863022
## Examination -0.33967990
## Education -0.64733678
## Catholic 0.07703325
## Infant.Mortality 1.08821833
Note: Agriculture, examination, and education are negatively associated with fertility, whereas Catholicism and infant mortality are positively associated with fertility.”
set.seed(123)
model = cv.glmnet(as.matrix(X), y, alpha = 1, lambda = 10^seq(4,-1,-0.1))
best_lambda = model$lambda.min
lasso_coeff = predict(model, s = best_lambda, type = "coefficients")
lasso_coeff
## 6 x 1 sparse Matrix of class "dgCMatrix"
## s=0.1258925
## (Intercept) 65.46374579
## Agriculture -0.14994107
## Examination -0.24310141
## Education -0.83632674
## Catholic 0.09913931
## Infant.Mortality 1.07238898
Note: Both ridge regression and lasso regression are addressed to deal with multicollinearity.