#(a). Load up data mtcars and generate its descriptive statistics as shown below. Here are some packages you need to install: psych, tidyverse, tidymodels, vip, ISLR2.
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
#install.packages("psych")
#install.packages("tidyverse")
#install.packages("tidymodels")
#install.packages("vip")
#install.packages("ISLR")
library(psych)
describe(mtcars)
## vars n mean sd median trimmed mad min max range skew
## mpg 1 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61
## cyl 2 32 6.19 1.79 6.00 6.23 2.97 4.00 8.00 4.00 -0.17
## disp 3 32 230.72 123.94 196.30 222.52 140.48 71.10 472.00 400.90 0.38
## hp 4 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73
## drat 5 32 3.60 0.53 3.70 3.58 0.70 2.76 4.93 2.17 0.27
## wt 6 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42
## qsec 7 32 17.85 1.79 17.71 17.83 1.42 14.50 22.90 8.40 0.37
## vs 8 32 0.44 0.50 0.00 0.42 0.00 0.00 1.00 1.00 0.24
## am 9 32 0.41 0.50 0.00 0.38 0.00 0.00 1.00 1.00 0.36
## gear 10 32 3.69 0.74 4.00 3.62 1.48 3.00 5.00 2.00 0.53
## carb 11 32 2.81 1.62 2.00 2.65 1.48 1.00 8.00 7.00 1.05
## kurtosis se
## mpg -0.37 1.07
## cyl -1.76 0.32
## disp -1.21 21.91
## hp -0.14 12.12
## drat -0.71 0.09
## wt -0.02 0.17
## qsec 0.34 0.32
## vs -2.00 0.09
## am -1.92 0.09
## gear -1.07 0.13
## carb 1.26 0.29
# item name ,item number, nvalid, mean, sd,
# median, mad, min, max, skew, kurtosis, se
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
#(b). As we need to assess the relationship between mpg and other predictors, the visualization check will be implemented. Based on the plot, which predictors may have transformations like 𝑙𝑜𝑔(𝑥), √𝑥 or 𝑥2. Comment on your findings.
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
cyl_mpg <- ggplot(mtcars, aes(x = cyl, y = mpg)) +
geom_point() +
geom_smooth(method=lm, se=FALSE)
plot(cyl_mpg)
## `geom_smooth()` using formula = 'y ~ x'
#(c). Run the multiple regression on mpg across all predictors and show the estimated results.
library(ggplot2, dplyr, caTools)
mpg <- lm(mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb, data=mtcars)
mpg
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs +
## am + gear + carb, data = mtcars)
##
## Coefficients:
## (Intercept) cyl disp hp drat wt
## 12.30337 -0.11144 0.01334 -0.02148 0.78711 -3.71530
## qsec vs am gear carb
## 0.82104 0.31776 2.52023 0.65541 -0.19942
summary(mpg)
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs +
## am + gear + carb, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4506 -1.6044 -0.1196 1.2193 4.6271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.30337 18.71788 0.657 0.5181
## cyl -0.11144 1.04502 -0.107 0.9161
## disp 0.01334 0.01786 0.747 0.4635
## hp -0.02148 0.02177 -0.987 0.3350
## drat 0.78711 1.63537 0.481 0.6353
## wt -3.71530 1.89441 -1.961 0.0633 .
## qsec 0.82104 0.73084 1.123 0.2739
## vs 0.31776 2.10451 0.151 0.8814
## am 2.52023 2.05665 1.225 0.2340
## gear 0.65541 1.49326 0.439 0.6652
## carb -0.19942 0.82875 -0.241 0.8122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared: 0.869, Adjusted R-squared: 0.8066
## F-statistic: 13.93 on 10 and 21 DF, p-value: 3.793e-07
plot(mpg)
#(d). Using car::vif (variance inflation factor) to estimate if there is any multicollinearity among predictors. Find predictors with vif higher than 10.
#install.packages('car')
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
vif(mpg)
## cyl disp hp drat wt qsec vs am
## 15.373833 21.620241 9.832037 3.374620 15.164887 7.527958 4.965873 4.648487
## gear carb
## 5.357452 7.908747
#(e). Rerun the multiple regressions by (1) excluding disp, and (2) excluding disp and cyl from predictors. Are there any improvements observed from the regression results?
mpg1 <- lm(mpg ~ cyl + hp + drat + wt + qsec + vs + am + gear + carb, data=mtcars)
summary(mpg1)
##
## Call:
## lm(formula = mpg ~ cyl + hp + drat + wt + qsec + vs + am + gear +
## carb, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7863 -1.4055 -0.2635 1.2029 4.4753
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.55052 18.52585 0.677 0.5052
## cyl 0.09627 0.99715 0.097 0.9240
## hp -0.01295 0.01834 -0.706 0.4876
## drat 0.92864 1.60794 0.578 0.5694
## wt -2.62694 1.19800 -2.193 0.0392 *
## qsec 0.66523 0.69335 0.959 0.3478
## vs 0.16035 2.07277 0.077 0.9390
## am 2.47882 2.03513 1.218 0.2361
## gear 0.74300 1.47360 0.504 0.6191
## carb -0.61686 0.60566 -1.018 0.3195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.623 on 22 degrees of freedom
## Multiple R-squared: 0.8655, Adjusted R-squared: 0.8105
## F-statistic: 15.73 on 9 and 22 DF, p-value: 1.183e-07
vif(mpg1)
## cyl hp drat wt qsec vs am gear
## 14.284737 7.123361 3.329298 6.189050 6.914423 4.916053 4.645108 5.324402
## carb
## 4.310597
mpg2 <- lm(mpg ~ hp + drat + wt + qsec + vs + am + gear + carb, data=mtcars)
summary(mpg2)
##
## Call:
## lm(formula = mpg ~ hp + drat + wt + qsec + vs + am + gear + carb,
## data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8187 -1.3903 -0.3045 1.2269 4.5183
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.80810 12.88582 1.072 0.2950
## hp -0.01225 0.01649 -0.743 0.4650
## drat 0.88894 1.52061 0.585 0.5645
## wt -2.60968 1.15878 -2.252 0.0342 *
## qsec 0.63983 0.62752 1.020 0.3185
## vs 0.08786 1.88992 0.046 0.9633
## am 2.42418 1.91227 1.268 0.2176
## gear 0.69390 1.35294 0.513 0.6129
## carb -0.61286 0.59109 -1.037 0.3106
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.566 on 23 degrees of freedom
## Multiple R-squared: 0.8655, Adjusted R-squared: 0.8187
## F-statistic: 18.5 on 8 and 23 DF, p-value: 2.627e-08
vif(mpg2)
## hp drat wt qsec vs am gear carb
## 6.015788 3.111501 6.051127 5.918682 4.270956 4.285815 4.690187 4.290468
#(a). Fit a multiple regression model to predict Sales using Price, Urban and US.
#install.packages("ISLR2")
library(ISLR2)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom 1.0.4 ✔ rsample 1.1.1
## ✔ dials 1.2.0 ✔ tibble 3.1.8
## ✔ dplyr 1.1.0 ✔ tidyr 1.3.0
## ✔ infer 1.0.4 ✔ tune 1.1.1
## ✔ modeldata 1.1.0 ✔ workflows 1.1.3
## ✔ parsnip 1.1.0 ✔ workflowsets 1.0.1
## ✔ purrr 1.0.1 ✔ yardstick 1.1.0
## ✔ recipes 1.0.5
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ scales::alpha() masks ggplot2::alpha(), psych::alpha()
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ✖ recipes::step() masks stats::step()
## • Use tidymodels_prefer() to resolve common conflicts.
set.seed(123)
split <- initial_split(Carseats,prop=0.8, strata= Sales)
train <- training(split)
test <- testing(split)
split
## <Training/Testing/Total>
## <319/81/400>
train
## Sales CompPrice Income Advertising Population Price ShelveLoc Age Education
## 13 3.98 122 35 2 393 136 Medium 62 18
## 23 5.08 128 46 6 497 138 Medium 42 13
## 28 5.27 98 118 0 19 107 Medium 64 17
## 29 2.99 103 74 0 359 97 Bad 55 11
## 35 2.67 115 54 0 406 128 Medium 42 17
## 38 4.95 121 41 5 412 110 Medium 54 10
## 40 3.24 130 60 0 144 138 Bad 38 10
## 45 4.16 85 79 6 325 95 Medium 69 13
## 46 4.56 141 63 0 168 135 Bad 44 12
## 48 4.38 126 98 0 173 108 Bad 55 16
## 49 3.91 116 52 0 349 98 Bad 69 18
## 51 1.42 99 32 18 341 108 Bad 80 16
## 52 4.42 121 90 0 150 108 Bad 75 16
## 55 4.90 134 103 13 25 144 Medium 76 17
## 58 0.91 93 91 0 22 117 Bad 75 11
## 60 5.21 118 71 4 148 114 Medium 80 13
## 63 1.82 139 45 0 146 133 Bad 77 17
## 66 4.90 122 26 0 197 128 Medium 55 13
## 79 4.43 134 48 1 139 145 Medium 65 12
## 85 2.23 111 25 0 52 121 Bad 43 18
## 91 5.33 115 22 0 491 103 Medium 64 11
## 92 4.81 97 46 11 267 107 Medium 80 15
## 100 4.88 121 47 3 220 107 Bad 56 16
## 101 4.11 113 69 11 94 106 Medium 76 12
## 103 5.30 113 22 0 57 97 Medium 65 16
## 104 5.07 123 91 0 334 96 Bad 78 17
## 105 4.62 121 96 0 472 138 Medium 51 12
## 107 0.16 102 33 0 217 139 Medium 70 18
## 117 5.08 135 75 0 202 128 Medium 80 10
## 130 4.47 143 120 7 279 147 Bad 40 10
## 135 3.67 132 31 0 327 131 Medium 76 16
## 137 5.17 131 75 0 10 120 Bad 31 18
## 144 0.53 122 88 7 36 159 Bad 28 17
## 147 3.90 114 83 0 412 131 Bad 39 14
## 161 4.67 111 28 0 486 111 Medium 29 12
## 166 0.37 147 58 7 100 191 Bad 27 15
## 175 0.00 139 24 0 358 185 Medium 79 15
## 183 4.74 137 60 4 230 140 Bad 25 13
## 184 5.32 118 74 6 426 102 Medium 80 18
## 196 4.19 117 93 4 420 112 Bad 66 11
## 198 2.52 124 61 0 333 138 Medium 76 16
## 199 3.62 112 80 5 500 128 Medium 69 10
## 207 4.97 162 67 0 27 160 Medium 77 17
## 210 3.02 98 21 11 326 90 Bad 76 11
## 215 4.83 115 115 3 48 107 Medium 73 18
## 216 2.34 116 83 15 170 144 Bad 71 11
## 218 4.34 106 44 0 481 111 Medium 70 14
## 224 3.45 110 45 9 276 125 Medium 62 14
## 225 4.10 134 82 0 464 141 Medium 48 13
## 231 5.16 115 60 0 119 114 Bad 38 14
## 240 3.89 123 105 0 149 118 Bad 62 16
## 243 4.68 124 46 0 199 135 Medium 52 14
## 249 5.36 111 52 0 12 101 Medium 61 11
## 250 5.05 125 67 0 86 117 Bad 65 11
## 252 3.72 139 111 5 310 132 Bad 62 13
## 257 4.20 147 40 0 277 144 Medium 73 10
## 259 3.47 108 38 0 251 81 Bad 72 14
## 266 5.31 130 35 10 402 129 Bad 39 17
## 270 5.01 159 69 0 438 166 Medium 46 17
## 272 4.55 111 56 0 504 110 Medium 62 16
## 280 3.42 141 57 13 376 158 Medium 64 18
## 281 2.86 121 86 10 496 145 Bad 51 10
## 284 5.36 135 110 0 112 117 Medium 80 16
## 298 3.07 118 83 13 276 104 Bad 75 10
## 303 5.28 108 77 13 388 110 Bad 74 14
## 307 4.78 131 32 1 85 133 Medium 48 12
## 325 2.66 136 65 4 133 150 Bad 53 13
## 327 4.69 133 30 0 152 122 Medium 53 17
## 329 3.15 117 66 1 65 111 Bad 55 11
## 337 5.17 138 35 6 60 143 Bad 28 18
## 346 4.81 121 68 0 279 149 Good 79 12
## 355 5.30 133 31 1 80 145 Medium 42 18
## 357 3.58 142 109 0 111 164 Good 72 12
## 359 4.17 123 96 10 71 118 Bad 69 11
## 360 3.13 130 62 11 396 130 Bad 66 14
## 363 5.25 131 55 0 26 110 Bad 79 12
## 382 3.90 124 65 21 496 151 Bad 77 13
## 383 4.95 121 28 19 315 121 Medium 66 14
## 393 4.53 129 42 13 315 130 Bad 34 13
## 395 5.35 130 58 19 366 139 Bad 33 16
## 4 7.40 117 100 4 466 97 Medium 55 14
## 9 6.54 132 110 0 108 124 Medium 76 10
## 21 6.41 125 90 2 367 131 Medium 35 18
## 24 5.87 121 31 0 292 109 Medium 79 10
## 39 6.59 109 73 0 454 102 Medium 65 15
## 54 6.92 109 64 13 39 119 Medium 61 17
## 56 6.85 143 81 5 60 154 Medium 61 18
## 59 5.42 103 93 15 188 103 Bad 74 16
## 62 7.32 105 32 0 358 107 Medium 26 13
## 72 6.50 148 51 16 148 150 Medium 58 17
## 73 5.52 115 45 0 432 116 Medium 25 15
## 75 6.20 150 68 5 125 136 Medium 64 13
## 96 5.58 134 25 10 237 148 Medium 59 13
## 98 7.45 161 82 5 287 129 Bad 33 16
## 106 5.55 104 100 8 398 97 Medium 61 11
## 112 6.62 132 118 12 272 151 Medium 43 14
## 113 6.67 116 99 5 298 125 Good 62 12
## 114 6.01 131 29 11 335 127 Bad 33 12
## 120 7.37 130 94 8 137 128 Medium 64 12
## 121 6.87 128 105 11 249 131 Medium 63 13
## 123 6.88 119 100 5 45 108 Medium 75 10
## 128 6.52 125 48 3 192 116 Medium 51 14
## 132 6.50 108 69 3 208 94 Medium 77 16
## 138 6.52 128 42 0 436 118 Medium 80 11
## 141 6.03 133 60 10 277 129 Medium 45 18
## 142 6.53 140 42 0 331 131 Bad 28 15
## 143 7.44 124 84 0 300 104 Medium 77 15
## 155 6.89 129 69 10 289 110 Medium 50 16
## 157 7.49 146 34 0 220 157 Good 51 16
## 164 5.68 130 64 0 40 106 Bad 39 17
## 167 6.71 119 67 17 151 137 Medium 55 11
## 169 7.30 129 89 0 425 117 Medium 45 10
## 174 6.38 135 91 5 207 128 Medium 66 18
## 182 7.43 121 83 0 79 91 Medium 68 11
## 188 6.03 117 32 0 142 96 Bad 62 17
## 192 6.67 156 42 13 170 173 Good 74 14
## 200 6.42 122 88 5 335 126 Medium 64 14
## 201 5.56 144 92 0 349 146 Medium 62 12
## 202 5.94 138 83 0 139 134 Medium 54 18
## 206 5.68 113 22 1 317 132 Medium 28 12
## 217 5.73 141 33 0 243 144 Medium 34 17
## 222 6.43 124 44 0 125 107 Medium 80 11
## 229 5.40 149 73 13 381 163 Bad 26 11
## 236 5.53 126 32 8 95 132 Medium 50 17
## 239 7.36 121 24 0 200 133 Good 73 13
## 247 6.90 120 56 20 266 90 Bad 78 18
## 254 5.64 124 24 5 288 122 Medium 57 12
## 265 6.95 128 29 5 324 159 Good 31 15
## 268 5.83 134 82 7 473 112 Bad 51 12
## 275 7.22 135 93 2 67 119 Medium 34 11
## 277 6.93 135 69 14 296 130 Medium 73 15
## 279 7.22 114 113 2 129 151 Good 40 15
## 288 6.88 95 44 4 208 72 Bad 44 17
## 289 6.98 116 40 0 74 97 Medium 76 15
## 292 6.64 118 70 0 106 89 Bad 39 17
## 302 7.41 99 93 0 198 87 Medium 57 16
## 308 5.90 138 92 0 13 120 Bad 61 12
## 312 6.15 146 68 12 328 132 Bad 51 14
## 313 6.80 137 117 5 337 135 Bad 38 10
## 316 6.39 131 21 8 220 171 Good 29 14
## 320 6.97 127 45 19 459 129 Medium 57 11
## 321 5.86 136 70 12 171 152 Medium 44 18
## 328 6.23 112 38 17 316 104 Medium 80 16
## 333 5.74 106 33 20 354 104 Medium 61 12
## 334 5.87 136 60 7 303 147 Medium 41 10
## 336 6.18 120 70 15 464 110 Medium 72 15
## 339 5.97 112 24 0 164 101 Medium 45 11
## 342 7.38 98 120 0 268 93 Medium 72 10
## 344 5.99 117 42 10 371 121 Bad 26 14
## 356 7.02 130 100 0 306 146 Good 42 11
## 366 6.53 154 30 0 122 162 Medium 57 17
## 374 5.58 137 71 0 402 116 Medium 78 17
## 378 6.81 132 61 0 263 125 Medium 41 12
## 379 6.11 133 88 3 105 119 Medium 79 12
## 380 5.81 125 111 0 404 107 Bad 54 15
## 386 5.87 131 73 13 455 132 Medium 62 17
## 391 5.47 108 75 9 61 111 Medium 67 12
## 392 6.10 153 63 0 49 124 Bad 56 16
## 394 5.57 109 51 10 26 120 Medium 30 17
## 399 5.94 100 79 7 284 95 Bad 50 12
## 11 9.01 121 78 9 150 100 Bad 26 10
## 16 8.71 149 95 5 400 144 Medium 76 18
## 17 7.58 118 32 0 284 110 Good 63 13
## 20 8.73 129 76 16 58 121 Medium 69 12
## 27 8.33 107 115 11 496 131 Good 50 11
## 30 7.81 104 99 15 226 102 Bad 58 17
## 32 8.25 136 58 16 241 131 Medium 44 18
## 37 8.89 122 76 0 270 100 Good 60 18
## 42 7.96 157 53 0 403 124 Bad 58 16
## 53 7.91 153 40 3 112 129 Bad 39 18
## 65 7.80 100 67 12 184 104 Medium 32 16
## 68 9.01 126 61 14 152 115 Medium 47 16
## 70 7.99 127 59 0 339 99 Medium 65 12
## 80 9.14 134 67 0 286 90 Bad 41 13
## 81 8.01 113 100 16 353 79 Bad 68 11
## 86 8.47 125 103 0 304 112 Medium 49 13
## 87 8.70 150 84 9 432 134 Medium 64 15
## 90 7.95 128 66 3 493 119 Medium 45 16
## 94 8.86 145 30 0 67 104 Medium 55 17
## 95 8.39 115 97 5 134 84 Bad 55 11
## 110 8.98 115 65 0 217 90 Medium 60 17
## 111 9.00 128 62 7 125 116 Medium 43 14
## 115 9.31 122 87 9 17 106 Medium 65 13
## 116 8.54 139 35 0 95 129 Medium 42 13
## 118 8.80 145 53 0 507 119 Medium 41 12
## 125 8.87 131 113 0 181 120 Good 63 14
## 131 8.41 94 84 13 497 77 Medium 51 12
## 146 8.77 144 63 11 27 117 Medium 47 17
## 149 7.56 110 119 0 384 97 Medium 72 14
## 153 7.64 128 78 0 341 128 Good 45 13
## 156 7.71 98 72 0 59 69 Medium 65 16
## 160 9.32 119 60 0 372 70 Bad 30 18
## 165 8.22 148 64 0 58 141 Medium 27 13
## 171 8.01 128 39 12 356 118 Medium 71 10
## 173 9.03 104 102 13 123 110 Good 35 16
## 176 7.54 115 89 0 38 122 Medium 25 12
## 180 7.78 144 25 3 70 116 Medium 77 18
## 187 8.68 120 51 0 93 86 Medium 46 17
## 191 8.79 130 37 13 297 101 Medium 37 13
## 193 7.56 108 26 0 408 93 Medium 56 14
## 205 8.74 155 80 0 237 124 Medium 37 14
## 209 7.78 86 54 0 497 64 Bad 33 12
## 214 8.23 149 84 5 220 139 Medium 33 10
## 227 7.80 119 33 0 245 122 Good 56 14
## 228 8.69 113 64 10 68 101 Medium 57 16
## 232 8.09 132 69 0 123 122 Medium 27 11
## 234 8.65 123 76 18 218 120 Medium 29 14
## 237 9.32 141 34 16 361 108 Medium 69 10
## 244 7.82 124 25 13 87 110 Medium 57 10
## 245 8.78 130 30 0 391 100 Medium 26 18
## 253 8.31 133 97 0 70 117 Medium 32 16
## 256 7.71 123 81 8 198 81 Bad 80 15
## 258 8.67 125 62 14 477 112 Medium 80 13
## 261 7.67 129 117 8 400 101 Bad 36 10
## 264 7.77 116 26 6 434 115 Medium 25 17
## 278 7.80 136 48 12 326 125 Medium 36 16
## 283 7.74 150 96 0 80 154 Good 61 11
## 286 7.60 146 26 11 261 131 Medium 39 10
## 290 8.75 143 77 25 448 156 Medium 43 17
## 297 8.21 127 44 13 160 123 Good 63 18
## 301 8.57 116 78 1 158 99 Medium 45 11
## 306 8.03 115 29 26 394 132 Medium 33 13
## 309 9.24 126 80 19 436 126 Medium 52 10
## 315 7.72 133 33 10 333 129 Good 71 14
## 322 7.52 123 39 5 499 98 Medium 34 15
## 323 9.16 140 50 10 300 139 Good 60 15
## 335 7.63 93 117 9 489 83 Bad 42 13
## 338 8.61 130 38 0 283 102 Medium 80 15
## 343 7.81 137 102 13 422 118 Medium 71 10
## 345 8.43 138 80 0 108 126 Good 70 13
## 347 8.97 132 107 0 144 125 Medium 33 13
## 351 8.64 111 101 17 266 91 Medium 63 17
## 361 8.77 118 86 7 265 114 Good 52 15
## 362 8.68 131 25 10 183 104 Medium 56 15
## 371 7.68 126 41 22 403 119 Bad 42 12
## 372 9.08 152 81 0 191 126 Medium 54 16
## 373 7.80 121 50 0 508 98 Medium 65 11
## 376 7.90 132 46 4 206 124 Medium 73 11
## 388 8.67 142 73 14 238 115 Medium 73 14
## 389 8.14 135 89 11 245 78 Bad 79 16
## 2 11.22 111 48 16 260 83 Good 65 10
## 3 10.06 113 35 10 269 80 Medium 59 12
## 8 11.85 136 81 15 425 120 Good 67 10
## 12 11.96 117 94 4 503 94 Good 50 13
## 15 11.17 107 117 11 148 118 Good 52 18
## 18 12.29 147 74 13 251 131 Good 52 10
## 25 10.14 145 119 16 294 113 Bad 42 12
## 26 14.90 139 32 0 176 82 Good 54 11
## 36 11.07 131 84 11 29 96 Medium 44 17
## 43 10.43 77 69 0 25 24 Medium 50 18
## 47 12.44 127 90 14 16 70 Medium 48 15
## 50 10.61 157 93 0 51 149 Good 32 17
## 57 11.91 133 82 0 54 84 Medium 50 17
## 71 9.46 89 81 15 237 99 Good 74 12
## 74 12.61 118 90 10 54 104 Good 31 11
## 77 10.64 102 87 10 346 70 Medium 64 15
## 83 11.62 151 83 4 325 139 Good 28 17
## 88 11.70 131 67 7 272 126 Good 54 16
## 97 9.48 147 42 10 407 132 Good 73 16
## 99 12.49 122 77 24 382 127 Good 36 16
## 122 11.67 125 89 10 380 87 Bad 28 10
## 126 9.34 89 78 0 181 49 Medium 43 15
## 127 11.27 153 68 2 60 133 Good 59 16
## 133 9.54 125 87 9 232 136 Good 72 10
## 140 12.30 146 62 10 310 94 Medium 30 13
## 148 10.51 140 54 9 402 119 Good 41 16
## 150 11.48 121 120 13 140 87 Medium 56 11
## 151 10.49 122 84 8 176 114 Good 57 10
## 152 10.77 111 58 17 407 103 Good 75 17
## 158 10.21 121 58 8 249 90 Medium 48 13
## 159 12.53 142 90 1 189 112 Good 39 10
## 172 12.49 93 106 12 416 55 Medium 75 15
## 178 10.48 138 72 0 148 94 Medium 27 17
## 185 9.95 132 33 7 35 97 Medium 60 11
## 186 10.07 130 100 11 449 107 Medium 64 10
## 194 13.28 139 70 7 71 96 Good 61 10
## 212 9.39 117 118 14 445 120 Medium 32 15
## 213 12.04 145 69 19 501 105 Medium 45 11
## 219 9.70 138 61 12 156 120 Medium 25 14
## 220 10.62 116 79 19 359 116 Good 58 17
## 221 10.59 131 120 15 262 124 Medium 30 10
## 230 11.19 98 104 0 404 72 Medium 27 18
## 235 9.43 115 62 11 289 129 Good 56 16
## 238 9.62 151 28 8 499 135 Medium 48 10
## 241 10.31 159 80 0 362 121 Medium 26 18
## 246 10.00 114 43 0 199 88 Good 57 10
## 255 9.58 108 104 23 353 129 Good 37 17
## 271 11.99 119 26 0 284 89 Good 26 10
## 273 12.98 113 33 0 14 63 Good 38 12
## 274 10.04 116 106 8 244 86 Medium 58 12
## 282 11.19 122 69 7 303 105 Good 45 16
## 291 9.49 107 111 14 400 103 Medium 41 11
## 294 11.28 123 84 0 74 89 Good 59 10
## 299 10.98 148 63 0 312 130 Good 63 15
## 304 10.01 133 52 16 290 99 Medium 43 11
## 310 11.18 131 111 13 33 80 Bad 68 18
## 311 9.53 175 65 29 419 166 Medium 53 12
## 314 9.33 103 81 3 491 54 Medium 66 13
## 317 15.63 122 36 5 369 72 Good 35 10
## 319 10.08 116 72 10 456 130 Good 41 14
## 324 10.36 107 105 18 428 103 Medium 34 12
## 326 11.70 144 69 11 131 104 Medium 47 11
## 332 10.10 135 63 15 213 134 Medium 32 10
## 340 11.54 134 44 4 219 126 Good 44 15
## 349 12.57 132 102 20 459 107 Good 49 11
## 352 10.44 124 115 16 458 105 Medium 62 16
## 353 13.44 133 103 14 288 122 Good 61 17
## 354 9.45 107 67 12 430 92 Medium 35 12
## 358 13.36 103 73 3 276 72 Medium 34 15
## 364 10.26 111 75 1 377 108 Good 25 12
## 365 10.50 122 21 16 488 131 Good 30 14
## 368 14.37 95 106 0 256 53 Good 52 17
## 369 10.71 109 22 10 348 79 Good 74 14
## 370 10.26 135 100 22 463 122 Medium 36 14
## 375 9.44 131 47 7 90 118 Medium 47 12
## 377 16.27 141 60 19 319 92 Good 44 11
## 381 9.64 106 64 10 17 89 Medium 68 17
## 385 12.85 123 37 15 348 112 Good 28 12
## 400 9.71 134 37 0 27 120 Good 49 16
## Urban US
## 13 Yes No
## 23 Yes No
## 28 Yes No
## 29 Yes Yes
## 35 Yes Yes
## 38 Yes Yes
## 40 No No
## 45 Yes Yes
## 46 Yes Yes
## 48 Yes No
## 49 Yes No
## 51 Yes Yes
## 52 Yes No
## 55 No Yes
## 58 Yes No
## 60 Yes No
## 63 Yes Yes
## 66 No No
## 79 Yes Yes
## 85 No No
## 91 No No
## 92 Yes Yes
## 100 No Yes
## 101 No Yes
## 103 No No
## 104 Yes Yes
## 105 Yes No
## 107 No No
## 117 No No
## 130 No Yes
## 135 Yes No
## 137 No No
## 144 Yes Yes
## 147 Yes No
## 161 No No
## 166 Yes Yes
## 175 No No
## 183 Yes No
## 184 Yes Yes
## 196 Yes Yes
## 198 Yes No
## 199 Yes Yes
## 207 Yes Yes
## 210 No Yes
## 215 Yes Yes
## 216 Yes Yes
## 218 No No
## 224 Yes Yes
## 225 No No
## 231 No No
## 240 Yes Yes
## 243 No No
## 249 Yes Yes
## 250 Yes No
## 252 Yes Yes
## 257 Yes No
## 259 No No
## 266 Yes Yes
## 270 Yes No
## 272 Yes No
## 280 Yes Yes
## 281 Yes Yes
## 284 No No
## 298 Yes Yes
## 303 Yes Yes
## 307 Yes Yes
## 325 Yes Yes
## 327 Yes No
## 329 Yes Yes
## 337 Yes No
## 346 Yes No
## 355 Yes Yes
## 357 Yes No
## 359 Yes Yes
## 360 Yes Yes
## 363 Yes Yes
## 382 Yes Yes
## 383 Yes Yes
## 393 Yes Yes
## 395 Yes Yes
## 4 Yes Yes
## 9 No No
## 21 Yes Yes
## 24 Yes No
## 39 Yes No
## 54 Yes Yes
## 56 Yes Yes
## 59 Yes Yes
## 62 No No
## 72 No Yes
## 73 Yes No
## 75 No Yes
## 96 Yes Yes
## 98 Yes Yes
## 106 Yes Yes
## 112 Yes Yes
## 113 Yes Yes
## 114 Yes Yes
## 120 Yes Yes
## 121 Yes Yes
## 123 Yes Yes
## 128 Yes Yes
## 132 Yes No
## 138 Yes No
## 141 Yes Yes
## 142 Yes No
## 143 Yes No
## 155 No Yes
## 157 Yes No
## 164 No No
## 167 Yes Yes
## 169 Yes No
## 174 Yes Yes
## 182 Yes No
## 188 Yes No
## 192 Yes Yes
## 200 Yes Yes
## 201 No No
## 202 Yes No
## 206 Yes No
## 217 Yes No
## 222 Yes No
## 229 No Yes
## 236 Yes Yes
## 239 Yes No
## 247 Yes Yes
## 254 No Yes
## 265 Yes Yes
## 268 No Yes
## 275 Yes Yes
## 277 Yes Yes
## 279 No Yes
## 288 Yes Yes
## 289 No No
## 292 Yes No
## 302 Yes Yes
## 308 Yes No
## 312 Yes Yes
## 313 Yes Yes
## 316 Yes Yes
## 320 No Yes
## 321 Yes Yes
## 328 Yes Yes
## 333 Yes Yes
## 334 Yes Yes
## 336 Yes Yes
## 339 Yes No
## 342 No No
## 344 Yes Yes
## 356 Yes No
## 366 No No
## 374 Yes No
## 378 No No
## 379 Yes Yes
## 380 Yes No
## 386 Yes Yes
## 391 Yes Yes
## 392 Yes No
## 394 No Yes
## 399 Yes Yes
## 11 No Yes
## 16 No No
## 17 Yes No
## 20 Yes Yes
## 27 No Yes
## 30 Yes Yes
## 32 Yes Yes
## 37 No No
## 42 Yes No
## 53 Yes Yes
## 65 No Yes
## 68 Yes Yes
## 70 Yes No
## 80 Yes No
## 81 Yes Yes
## 86 No No
## 87 Yes No
## 90 No No
## 94 Yes No
## 95 Yes Yes
## 110 No No
## 111 Yes Yes
## 115 Yes Yes
## 116 Yes No
## 118 Yes No
## 125 Yes No
## 131 Yes Yes
## 146 Yes Yes
## 149 No Yes
## 153 No No
## 156 Yes No
## 160 No No
## 165 No Yes
## 171 Yes Yes
## 173 Yes Yes
## 176 Yes No
## 180 Yes Yes
## 187 No No
## 191 No Yes
## 193 No No
## 205 Yes No
## 209 Yes No
## 214 Yes Yes
## 227 Yes No
## 228 Yes Yes
## 232 No No
## 234 No Yes
## 237 Yes Yes
## 244 Yes Yes
## 245 Yes No
## 253 Yes No
## 256 Yes Yes
## 258 Yes Yes
## 261 Yes Yes
## 264 Yes Yes
## 278 Yes Yes
## 283 Yes No
## 286 Yes Yes
## 290 Yes Yes
## 297 Yes Yes
## 301 Yes Yes
## 306 Yes Yes
## 309 Yes Yes
## 315 Yes Yes
## 322 Yes No
## 323 Yes Yes
## 335 Yes Yes
## 338 Yes No
## 343 No Yes
## 345 No Yes
## 347 No No
## 351 No Yes
## 361 No Yes
## 362 No Yes
## 371 Yes Yes
## 372 Yes No
## 373 No No
## 376 Yes No
## 388 No Yes
## 389 Yes Yes
## 2 Yes Yes
## 3 Yes Yes
## 8 Yes Yes
## 12 Yes Yes
## 15 Yes Yes
## 18 Yes Yes
## 25 Yes Yes
## 26 No No
## 36 No Yes
## 43 Yes No
## 47 No Yes
## 50 Yes No
## 57 Yes No
## 71 Yes Yes
## 74 No Yes
## 77 Yes Yes
## 83 Yes Yes
## 88 No Yes
## 97 No Yes
## 99 No Yes
## 122 Yes Yes
## 126 No No
## 127 Yes Yes
## 133 Yes Yes
## 140 No Yes
## 148 No Yes
## 150 Yes Yes
## 151 No Yes
## 152 No Yes
## 158 No Yes
## 159 No Yes
## 172 Yes Yes
## 178 Yes Yes
## 185 No Yes
## 186 Yes Yes
## 194 Yes Yes
## 212 Yes Yes
## 213 Yes Yes
## 219 Yes Yes
## 220 Yes Yes
## 221 Yes Yes
## 230 No No
## 235 No Yes
## 238 Yes Yes
## 241 Yes No
## 246 No Yes
## 255 Yes Yes
## 271 Yes No
## 273 Yes No
## 274 Yes Yes
## 282 No Yes
## 291 No Yes
## 294 Yes No
## 299 Yes No
## 304 Yes Yes
## 310 Yes Yes
## 311 Yes Yes
## 314 Yes No
## 317 Yes Yes
## 319 No Yes
## 324 Yes Yes
## 326 Yes Yes
## 332 Yes Yes
## 340 Yes Yes
## 349 Yes Yes
## 352 No Yes
## 353 Yes Yes
## 354 No Yes
## 358 Yes Yes
## 364 Yes No
## 365 Yes Yes
## 368 Yes No
## 369 No Yes
## 370 Yes Yes
## 375 Yes Yes
## 377 Yes Yes
## 381 Yes Yes
## 385 Yes Yes
## 400 Yes Yes
test
## Sales CompPrice Income Advertising Population Price ShelveLoc Age Education
## 1 9.50 138 73 11 276 120 Bad 42 17
## 5 4.15 141 64 3 340 128 Bad 38 13
## 6 10.81 124 113 13 501 72 Bad 78 16
## 7 6.63 115 105 0 45 108 Medium 71 15
## 10 4.69 132 113 0 131 124 Medium 76 17
## 14 10.96 115 28 11 29 86 Good 53 18
## 19 13.91 110 110 0 408 68 Good 46 17
## 22 12.13 134 29 12 239 109 Good 62 18
## 31 13.55 125 94 0 447 89 Good 30 12
## 33 6.20 107 32 12 236 137 Good 64 10
## 34 8.77 114 38 13 317 128 Good 50 16
## 41 2.07 119 98 0 18 126 Bad 73 17
## 44 4.12 123 42 11 16 134 Medium 59 13
## 61 8.32 122 102 19 469 123 Bad 29 13
## 64 8.47 119 88 10 170 101 Medium 61 13
## 67 8.85 127 92 0 508 91 Medium 56 18
## 69 13.39 149 69 20 366 134 Good 60 13
## 76 8.55 88 111 23 480 92 Bad 36 16
## 78 7.70 118 71 12 44 89 Medium 67 18
## 82 7.52 116 72 0 237 128 Good 70 13
## 84 4.42 109 36 7 468 94 Bad 56 11
## 89 6.56 117 42 7 144 111 Medium 62 10
## 93 4.53 114 113 0 97 125 Medium 29 12
## 102 6.20 128 93 0 89 118 Medium 34 18
## 108 8.55 134 107 0 104 108 Medium 60 12
## 109 3.47 107 79 2 488 103 Bad 65 16
## 119 7.57 112 88 2 243 99 Medium 62 11
## 124 8.19 127 103 0 125 155 Good 29 15
## 129 4.96 133 100 3 350 126 Bad 55 13
## 134 7.62 132 98 2 265 97 Bad 62 12
## 136 6.44 96 94 14 384 120 Medium 36 18
## 139 10.27 125 103 12 371 109 Medium 44 10
## 145 9.09 132 68 0 264 123 Good 34 11
## 154 5.93 150 36 7 488 150 Medium 25 17
## 162 2.93 143 21 5 81 160 Medium 67 12
## 163 3.63 122 74 0 424 149 Medium 51 13
## 168 6.71 106 73 0 216 93 Medium 60 13
## 170 11.48 104 41 15 492 77 Good 73 18
## 177 5.61 138 107 9 480 154 Medium 47 11
## 179 10.66 104 71 14 89 81 Medium 25 14
## 181 4.94 137 112 15 434 149 Bad 66 13
## 189 8.07 116 37 0 426 90 Medium 76 15
## 190 12.11 118 117 18 509 104 Medium 26 15
## 195 7.23 112 98 18 481 128 Medium 45 11
## 197 4.10 130 28 6 410 133 Bad 72 16
## 203 4.10 121 78 4 413 130 Bad 46 10
## 204 2.05 131 82 0 132 157 Bad 25 14
## 208 8.19 111 105 0 466 97 Bad 61 10
## 211 4.36 125 41 2 357 123 Bad 47 14
## 223 7.49 136 119 6 178 145 Medium 35 13
## 226 6.68 107 25 0 412 82 Bad 36 14
## 233 13.14 137 80 10 24 105 Good 61 15
## 242 12.01 136 63 0 160 94 Medium 38 12
## 248 5.04 123 114 0 298 151 Bad 34 16
## 251 9.16 137 105 10 435 156 Good 72 14
## 260 5.12 123 36 10 467 100 Bad 74 11
## 262 5.71 121 42 4 188 118 Medium 54 15
## 263 6.37 120 77 15 86 132 Medium 48 18
## 267 9.10 128 93 12 343 112 Good 73 17
## 269 6.53 123 57 0 66 105 Medium 39 11
## 276 6.67 107 119 11 210 132 Medium 53 11
## 285 6.97 106 46 11 414 96 Bad 79 17
## 287 7.53 117 118 11 429 113 Medium 67 18
## 293 11.82 113 66 16 322 74 Good 76 15
## 295 12.66 148 76 3 126 99 Good 60 11
## 296 4.21 118 35 14 502 137 Medium 79 10
## 300 9.40 135 40 17 497 96 Medium 54 17
## 305 11.93 123 98 12 408 134 Good 29 10
## 318 6.41 142 30 0 472 136 Good 80 15
## 330 11.27 100 54 9 433 89 Good 45 12
## 331 4.99 122 59 0 501 112 Bad 32 14
## 341 7.50 140 29 0 105 91 Bad 43 16
## 348 6.88 96 39 0 161 112 Good 27 14
## 350 9.32 134 27 18 467 96 Medium 49 14
## 367 5.98 124 56 11 447 134 Medium 53 12
## 384 9.35 98 117 0 76 68 Medium 63 10
## 387 5.32 152 116 0 170 160 Medium 39 16
## 390 8.44 128 42 8 328 107 Medium 35 12
## 396 12.57 138 108 17 203 128 Good 33 14
## 397 6.14 139 23 3 37 120 Medium 55 11
## 398 7.41 162 26 12 368 159 Medium 40 18
## Urban US
## 1 Yes Yes
## 5 Yes No
## 6 No Yes
## 7 Yes No
## 10 No Yes
## 14 Yes Yes
## 19 No Yes
## 22 No Yes
## 31 Yes No
## 33 No Yes
## 34 Yes Yes
## 41 No No
## 44 Yes Yes
## 61 Yes Yes
## 64 Yes Yes
## 67 Yes No
## 69 Yes Yes
## 76 No Yes
## 78 No Yes
## 82 Yes No
## 84 Yes Yes
## 89 Yes Yes
## 93 Yes No
## 102 Yes No
## 108 Yes No
## 109 Yes No
## 119 Yes Yes
## 124 No Yes
## 129 Yes Yes
## 134 Yes Yes
## 136 No Yes
## 139 Yes Yes
## 145 No No
## 154 No Yes
## 162 No Yes
## 163 Yes No
## 168 Yes No
## 170 Yes Yes
## 177 No Yes
## 179 No Yes
## 181 Yes Yes
## 189 Yes No
## 190 No Yes
## 195 Yes Yes
## 197 Yes Yes
## 203 No Yes
## 204 Yes No
## 208 No No
## 211 No Yes
## 223 Yes Yes
## 226 Yes No
## 233 Yes Yes
## 242 Yes No
## 248 Yes No
## 251 Yes Yes
## 260 No Yes
## 262 Yes Yes
## 263 Yes Yes
## 267 No Yes
## 269 Yes No
## 276 Yes Yes
## 285 No No
## 287 No Yes
## 293 Yes Yes
## 295 Yes Yes
## 296 No Yes
## 300 No Yes
## 305 Yes Yes
## 318 No No
## 330 Yes Yes
## 331 No No
## 341 Yes No
## 348 No No
## 350 No Yes
## 367 No Yes
## 384 Yes No
## 387 Yes No
## 390 Yes Yes
## 396 Yes Yes
## 397 No Yes
## 398 Yes Yes
model <- linear_reg() %>% set_engine("lm")
lm <- model %>% fit(Sales ~ Price+ Urban+ US, data = train)
summary(lm$fit)
##
## Call:
## stats::lm(formula = Sales ~ Price + Urban + US, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7784 -1.6018 -0.0234 1.6380 7.2425
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.938978 0.736088 17.578 < 2e-16 ***
## Price -0.051814 0.005931 -8.736 < 2e-16 ***
## UrbanYes -0.252232 0.316369 -0.797 0.425894
## USYes 1.107562 0.293321 3.776 0.000191 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.497 on 315 degrees of freedom
## Multiple R-squared: 0.2194, Adjusted R-squared: 0.212
## F-statistic: 29.52 on 3 and 315 DF, p-value: < 2.2e-16
#predict by fitting the test data into the model
predict <- test %>% select(Sales) %>% bind_cols(predict(lm,new_data=test))
#predict result format
colnames(predict) <- c("Actual_Sales", "Prediction_Sales")
result <- data.frame(predict)
result
## Actual_Sales Prediction_Sales
## 1 9.50 7.576673
## 5 4.15 6.054602
## 6 10.81 10.315959
## 7 6.63 7.090875
## 10 4.69 7.621650
## 14 10.96 9.338336
## 19 13.91 10.523213
## 22 12.13 8.398855
## 31 13.55 8.075333
## 33 6.20 6.948073
## 34 8.77 7.162164
## 41 2.07 6.410461
## 44 4.12 6.851282
## 61 8.32 7.421232
## 64 8.47 8.561132
## 67 8.85 7.971706
## 69 13.39 6.851282
## 76 8.55 9.279686
## 78 7.70 9.435127
## 82 7.52 6.054602
## 84 4.42 8.923827
## 89 6.56 8.042995
## 93 4.53 6.210043
## 102 6.20 6.572738
## 108 8.55 7.090875
## 109 3.47 7.349943
## 119 7.57 8.664759
## 124 8.19 6.015428
## 129 4.96 7.265791
## 134 7.62 8.768386
## 136 6.44 7.828905
## 139 10.27 8.146623
## 145 9.09 6.565902
## 154 5.93 6.274496
## 162 2.93 5.756360
## 163 3.63 4.966516
## 168 6.71 7.868079
## 170 11.48 9.804659
## 177 5.61 6.067242
## 179 10.66 9.849636
## 181 4.94 6.074078
## 189 8.07 8.023520
## 190 12.11 8.657923
## 195 7.23 7.162164
## 197 4.10 6.903096
## 203 4.10 7.310769
## 204 2.05 4.552007
## 208 8.19 7.913056
## 211 4.36 7.673464
## 223 7.49 6.281332
## 226 6.68 8.438029
## 233 13.14 8.353877
## 242 12.01 7.816265
## 248 5.04 4.862889
## 251 9.16 5.711382
## 260 5.12 8.865177
## 262 5.71 7.680300
## 263 6.37 6.954909
## 267 9.10 8.243414
## 269 6.53 7.246315
## 276 6.67 6.954909
## 285 6.97 7.964870
## 287 7.53 8.191600
## 293 11.82 9.960099
## 295 12.66 8.664759
## 296 4.21 6.948073
## 300 9.40 9.072432
## 305 11.93 6.851282
## 318 6.41 5.892325
## 330 11.27 9.182895
## 331 4.99 7.135852
## 341 7.50 7.971706
## 348 6.88 7.135852
## 350 9.32 9.072432
## 367 5.98 7.103514
## 384 9.35 9.163419
## 387 5.32 4.396566
## 390 8.44 8.250250
## 396 12.57 7.162164
## 397 6.14 7.828905
## 398 7.41 5.555942
#Evaluate the prediction
rmse <- rmse(predict,
truth = Actual_Sales,
estimate = Prediction_Sales)
rsq <- rsq(predict,
truth = Actual_Sales,
estimate = Prediction_Sales)
#Evaluation result
eval<- rbind(rmse, rsq)
eval
## # A tibble: 2 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rmse standard 2.39
## 2 rsq standard 0.314
(b). Provide an interpretation of each coefficient in the model. Be careful–some of the variables in the model are qualitative! Urban and US are qualitative variables. Intercept = the expected sales when all other predictors are equal to 0. Price = The estimated coefficient for Price is -0.0545. This means that for every one unit increase in Price, Sales is expected to decrease by 0.0545 units, holding all other variables constant. Urban = The estimated coefficient for Urban is -0.022. This means that for stores located in urban areas, Sales is expected to decrease by 0.022 units compared to stores located in rural areas, holding all other variables constant. However, the p-value of 0.827 suggests that this effect is not statistically significant at the 0.05 level. Urban cannot be considered as a normally distributed predictor, indeed it has only two possible outcomes: yes/no. Urban predictor is a binary variable: if a store is located in an urban area, Urban = 1, otherwise Urban = 0 when all other predictors are equal. US = The estimated coefficient for US is 1.20. This means that for stores located in the US, Sales is expected to be higher by 1.20 units compared to stores located outside the US, holding all other variables constant. However, the p-value of 0.063 suggests that this effect is not statistically significant at the 0.05 level. The US predictor is also a binary variable: if a store is located in the US, US = 1, otherwise US = 0. Therefore, the coefficient for US represents the difference in expected Sales between US and non-US stores, when all other predictors are equal. (c). Write out the model in equation form, being careful to handle the qualitative variables properly. Sales= 12.9390 + -0.0518 * Price+ -0.2522 * UrbanYes+ 1.1076 * USYes Where: Urban = {YES = 1, NO = 0}; USYes = {YES = 1, NO = 0}
#(d). For which of the predictors can you reject the null hypothesis 𝐻0:𝛽𝑗=0?
#We run the model with all predictors:
data <- model %>% fit(Sales ~ ., data = train)
summary(data$fit)
##
## Call:
## stats::lm(formula = Sales ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6936 -0.7194 0.0321 0.7002 3.3218
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.7465910 0.6717671 8.554 5.72e-16 ***
## CompPrice 0.0958758 0.0046215 20.745 < 2e-16 ***
## Income 0.0138907 0.0021414 6.487 3.51e-10 ***
## Advertising 0.1222171 0.0122055 10.013 < 2e-16 ***
## Population -0.0002724 0.0004241 -0.642 0.521
## Price -0.0973130 0.0030369 -32.044 < 2e-16 ***
## ShelveLocGood 4.9762295 0.1741229 28.579 < 2e-16 ***
## ShelveLocMedium 2.1242422 0.1416622 14.995 < 2e-16 ***
## Age -0.0482196 0.0035436 -13.608 < 2e-16 ***
## Education -0.0198925 0.0218967 -0.908 0.364
## UrbanYes 0.1224443 0.1292138 0.948 0.344
## USYes -0.1913763 0.1659768 -1.153 0.250
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.009 on 307 degrees of freedom
## Multiple R-squared: 0.8758, Adjusted R-squared: 0.8714
## F-statistic: 196.8 on 11 and 307 DF, p-value: < 2.2e-16
#(e). On the basis of your response to the previous question, fit a smaller model that only uses the predictors for which there is evidence of association with the outcome.
#Build model:
data1 <- model %>% fit(Sales~ CompPrice+Income+Advertising+Price+ShelveLoc+Age, data=train)
summary(data1$fit)
##
## Call:
## stats::lm(formula = Sales ~ CompPrice + Income + Advertising +
## Price + ShelveLoc + Age, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5656 -0.7168 -0.0098 0.7051 3.2387
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.371596 0.552951 9.714 < 2e-16 ***
## CompPrice 0.096378 0.004579 21.047 < 2e-16 ***
## Income 0.013815 0.002124 6.504 3.12e-10 ***
## Advertising 0.112813 0.008533 13.220 < 2e-16 ***
## Price -0.097468 0.003023 -32.242 < 2e-16 ***
## ShelveLocGood 4.964289 0.172590 28.764 < 2e-16 ***
## ShelveLocMedium 2.118822 0.139967 15.138 < 2e-16 ***
## Age -0.047803 0.003518 -13.587 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.007 on 311 degrees of freedom
## Multiple R-squared: 0.8747, Adjusted R-squared: 0.8718
## F-statistic: 310 on 7 and 311 DF, p-value: < 2.2e-16
#predict by fitting the test data into the model:
predict_e <- test %>% select(Sales) %>% bind_cols(predict(data1,new_data=test))
#predict result format
colnames(predict_e) <- c("Actual_Sales", "Prediction_Sales")
predict_result_e <- data.frame(predict_e)
predict_result_e
## Actual_Sales Prediction_Sales
## 1 9.50 7.217363
## 5 4.15 5.891126
## 6 10.81 9.603865
## 7 6.63 6.103979
## 10 4.69 6.054428
## 14 10.96 12.131338
## 19 13.91 13.630415
## 22 12.13 11.417162
## 31 13.55 13.573059
## 33 6.20 6.031703
## 34 8.77 8.448503
## 41 2.07 2.423936
## 44 4.12 5.285040
## 61 8.32 7.307499
## 64 8.47 8.543057
## 67 8.85 9.454907
## 69 13.39 11.976861
## 76 8.55 7.293121
## 78 7.70 9.320238
## 82 7.52 6.688363
## 84 4.42 5.324904
## 89 6.56 6.353872
## 93 4.53 6.468892
## 102 6.20 7.985133
## 108 8.55 8.488621
## 109 3.47 3.854714
## 119 7.57 7.113043
## 124 8.19 7.505085
## 129 4.96 4.999746
## 134 7.62 7.254869
## 136 6.44 6.203692
## 139 10.27 9.587087
## 145 9.09 10.383388
## 154 5.93 7.418913
## 162 2.93 3.329014
## 163 3.63 3.310224
## 168 6.71 6.782331
## 170 11.48 11.623185
## 177 5.61 6.027366
## 179 10.66 10.984033
## 181 4.94 4.137205
## 189 8.07 6.776312
## 190 12.11 11.130524
## 195 7.23 7.042283
## 197 4.10 2.559416
## 203 4.10 3.692439
## 204 2.05 2.632459
## 208 8.19 5.149819
## 211 4.36 3.975625
## 223 7.49 7.112802
## 226 6.68 6.316159
## 233 13.14 12.622932
## 242 12.01 10.489708
## 248 5.04 2.458111
## 251 9.16 7.471632
## 260 5.12 5.567375
## 262 5.71 6.101094
## 263 6.37 6.651465
## 267 9.10 10.904849
## 269 6.53 8.033955
## 276 6.67 5.288536
## 285 6.97 4.330774
## 287 7.53 7.421147
## 293 11.82 13.097780
## 295 12.66 13.470748
## 296 4.21 3.796421
## 300 9.40 11.033609
## 305 11.93 10.451071
## 318 6.41 7.356169
## 330 11.27 10.909265
## 331 4.99 5.498732
## 341 7.50 8.340062
## 348 6.88 7.919901
## 350 9.32 11.109457
## 367 5.98 5.861652
## 384 9.35 8.912473
## 387 5.32 6.283298
## 390 8.44 9.207389
## 396 12.57 12.992552
## 397 6.14 7.217852
## 398 7.41 7.407104
#Evaluate the prediction
rmse_e <- rmse(predict_e,
truth = Actual_Sales,
estimate = Prediction_Sales)
rsq_e <- rsq(predict_e,
truth = Actual_Sales,
estimate = Prediction_Sales)
#Evaluation result
evaluation <- rbind(rmse_e, rsq_e)
evaluation
## # A tibble: 2 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rmse standard 1.09
## 2 rsq standard 0.859
#(f). How well do the models in (a) and (e) fit the data?
compare <- rbind(eval, evaluation)
compare
## # A tibble: 4 × 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rmse standard 2.39
## 2 rsq standard 0.314
## 3 rmse standard 1.09
## 4 rsq standard 0.859
#(g). Using the model from (e), obtain 95% confidence intervals for the coefficient(s).
#Combine confidence interval:
aaaa <- predict_e%>% bind_cols(predict(lm,test,type="pred_int"))
#Format the table
colnames(aaaa) <- c("Actual_Sales","Prediction_Sales","Lower_Bound","Upper_Bound")
aaaa <- data.frame(aaaa)
aaaa
## Actual_Sales Prediction_Sales Lower_Bound Upper_Bound
## 1 9.50 7.217363 2.64947683 12.503869
## 5 4.15 5.891126 1.11363551 10.995569
## 6 10.81 9.603865 5.34321260 15.288705
## 7 6.63 6.103979 2.15142138 12.030328
## 10 4.69 6.054428 2.67261591 12.570685
## 14 10.96 12.131338 4.39751751 14.279155
## 19 13.91 13.630415 5.54558279 15.500844
## 22 12.13 11.417162 3.45059965 13.347110
## 31 13.55 13.573059 3.12711158 13.023555
## 33 6.20 6.031703 1.99335877 11.902788
## 34 8.77 8.448503 2.23353463 12.090793
## 41 2.07 2.423936 1.45660462 11.364318
## 44 4.12 5.285040 1.92041820 11.782146
## 61 8.32 7.307499 2.49370572 12.348758
## 64 8.47 8.543057 3.63025232 13.492011
## 67 8.85 9.454907 3.02487457 12.918538
## 69 13.39 11.976861 1.92041820 11.782146
## 76 8.55 7.293121 4.32483354 14.234539
## 78 7.70 9.320238 4.47828678 14.391967
## 82 7.52 6.688363 1.11363551 10.995569
## 84 4.42 5.324904 3.98907953 13.858575
## 89 6.56 6.353872 3.11529777 12.970693
## 93 4.53 6.468892 1.27000624 11.150080
## 102 6.20 7.985133 1.63390682 11.511570
## 108 8.55 8.488621 2.15142138 12.030328
## 109 3.47 3.854714 2.40914501 12.290740
## 119 7.57 7.113043 3.73291213 13.596606
## 124 8.19 7.505085 1.04520955 10.985647
## 129 4.96 4.999746 2.33768593 12.193896
## 134 7.62 7.254869 3.83546170 13.701311
## 136 6.44 6.203692 2.88068364 12.777126
## 139 10.27 9.587087 3.21850957 13.074736
## 145 9.09 10.383388 1.61304759 11.518757
## 154 5.93 7.418913 1.30947078 11.239522
## 162 2.93 3.329014 0.78026953 10.732450
## 163 3.63 3.310224 0.01211396 9.920918
## 168 6.71 6.782331 2.92252781 12.813630
## 170 11.48 11.623185 4.85491403 14.754403
## 177 5.61 6.027366 1.09811617 11.036367
## 179 10.66 10.984033 4.88629196 14.812980
## 181 4.94 4.137205 1.13328867 11.014867
## 189 8.07 6.776312 3.07600678 12.971033
## 190 12.11 11.130524 3.70855170 13.607294
## 195 7.23 7.042283 2.23353463 12.090793
## 197 4.10 2.559416 1.97267325 11.833518
## 203 4.10 3.692439 2.35968936 12.261848
## 204 2.05 2.632459 -0.41068779 9.514702
## 208 8.19 5.149819 2.95852239 12.867590
## 211 4.36 3.975625 2.72467410 12.622254
## 223 7.49 7.112802 1.34379465 11.218870
## 226 6.68 6.316159 3.48407822 13.391979
## 233 13.14 12.622932 3.42460176 13.283153
## 242 12.01 10.489708 2.87131324 12.761217
## 248 5.04 2.458111 -0.09342293 9.819201
## 251 9.16 7.471632 0.76384856 10.658916
## 260 5.12 5.567375 3.91441839 13.815936
## 262 5.71 6.101094 2.75318605 12.607414
## 263 6.37 6.651465 2.02490072 11.884918
## 267 9.10 10.904849 3.29549826 13.191329
## 269 6.53 8.033955 2.30613821 12.186493
## 276 6.67 5.288536 2.02490072 11.884918
## 285 6.97 4.330774 3.00990039 12.919840
## 287 7.53 7.421147 3.24374276 13.139458
## 293 11.82 13.097780 5.00688823 14.913311
## 295 12.66 13.470748 3.73291213 13.596606
## 296 4.21 3.796421 1.99335877 11.902788
## 300 9.40 11.033609 4.11984551 14.025018
## 305 11.93 10.451071 1.92041820 11.782146
## 318 6.41 7.356169 0.93334426 10.851306
## 330 11.27 10.909265 4.24455920 14.121231
## 331 4.99 5.498732 2.18455536 12.087149
## 341 7.50 8.340062 3.02487457 12.918538
## 348 6.88 7.919901 2.18455536 12.087149
## 350 9.32 11.109457 4.11984551 14.025018
## 367 5.98 5.861652 2.15052223 12.056506
## 384 9.35 8.912473 4.19400146 14.132837
## 387 5.32 6.283298 -0.56968733 9.362820
## 390 8.44 9.207389 3.32161089 13.178889
## 396 12.57 12.992552 2.23353463 12.090793
## 397 6.14 7.217852 2.88068364 12.777126
## 398 7.41 7.407104 0.60510730 10.506776
#(h). Is there evidence of ouitliers or high leverage observations in the model from (e)?
par(mfrow=c(2,2))
plot(lm$fit)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.