plot(sales ~ TV, data = df1,
pch = 20, cex = 1.5,
main = "TV və satis xerclemeleri")
cor.test(df1$TV, df1$sales)
Pearson's product-moment correlation
data: df1$TV and df1$sales
t = 17.668, df = 198, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.7218201 0.8308014
sample estimates:
cor
0.7822244
pairs(df1)
library(PerformanceAnalytics)
chart.Correlation(df1, histogram = T, pch = 20)
library(caret)
package 㤼㸱caret㤼㸲 was built under R version 4.0.4Loading required package: lattice
Attaching package: 㤼㸱caret㤼㸲
The following object is masked from 㤼㸱package:purrr㤼㸲:
lift
featurePlot(x = df1[ ,c("TV", "radio", "newspaper")],
y = df1$sales)
sr_model <- lm(sales ~ ., data = df1)
summary(sr_model)
Call:
lm(formula = sales ~ ., data = df1)
Residuals:
Min 1Q Median 3Q Max
-8.8277 -0.8908 0.2418 1.1893 2.8292
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.938889 0.311908 9.422 <2e-16 ***
TV 0.045765 0.001395 32.809 <2e-16 ***
radio 0.188530 0.008611 21.893 <2e-16 ***
newspaper -0.001037 0.005871 -0.177 0.86
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.686 on 196 degrees of freedom
Multiple R-squared: 0.8972, Adjusted R-squared: 0.8956
F-statistic: 570.3 on 3 and 196 DF, p-value: < 2.2e-16
sr_model_2 <- lm(sales ~ TV + radio, data = df1)
summary(sr_model_2)
Call:
lm(formula = sales ~ TV + radio, data = df1)
Residuals:
Min 1Q Median 3Q Max
-8.7977 -0.8752 0.2422 1.1708 2.8328
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.92110 0.29449 9.919 <2e-16 ***
TV 0.04575 0.00139 32.909 <2e-16 ***
radio 0.18799 0.00804 23.382 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.681 on 197 degrees of freedom
Multiple R-squared: 0.8972, Adjusted R-squared: 0.8962
F-statistic: 859.6 on 2 and 197 DF, p-value: < 2.2e-16
#1
predict(sr_model)
1 2 3 4 5 6 7
20.523974 12.337855 12.307671 17.597830 13.188672 12.478348 11.729760
8 9 10 11 12 13 14
12.122953 3.727341 12.550849 7.032299 17.285129 10.577121 8.826300
15 16 17 18 19 20 21
18.434366 20.819300 12.823657 23.224957 9.951682 14.166073 18.100767
22 23 24 25 26 27 28
14.740538 6.489150 16.545933 8.146519 15.610039 14.989514 17.051673
29 30 31 32 33 34 35
19.410538 9.144024 21.633934 11.346093 7.638883 18.864268 7.574831
36 37 38 39 40 41 42
17.006826 23.405901 15.623478 9.908681 20.447610 16.377665 17.295983
43 44 45 46 47 48 49
21.595803 13.963857 8.887880 15.161523 8.873387 21.722630 16.263620
50 51 52 53 54 55 56
8.168166 12.631211 9.339813 20.662976 19.944700 20.374430 21.292611
57 58 59 60 61 62 63
8.527713 12.774588 21.898052 18.133487 5.742156 22.890672 16.784261
64 65 66 67 68 69 70
13.210692 16.977736 7.849045 9.016032 12.037007 18.976579 21.108912
71 72 73 74 75 76 77
17.779498 10.626938 10.366849 9.902982 17.329312 11.858322 4.477589
78 79 80 81 82 83 84
13.811902 8.813314 9.675303 11.445924 14.647941 10.178408 14.421842
85 86 87 88 89 90 91
20.781365 15.181408 11.598707 15.593785 11.711271 16.922255 9.999230
92 93 94 95 96 97 98
4.496316 19.156396 21.227574 10.482124 16.314921 12.635717 15.337078
99 100 101 102 103 104 105
24.118607 16.940350 13.875958 23.242487 17.644094 14.762211 20.301109
106 107 108 109 110 111 112
17.936415 6.126022 7.108502 3.587258 19.692931 14.759874 21.140275
113 114 115 116 117 118 119
13.880610 16.403776 15.305096 12.919689 11.978747 6.570777 15.566093
120 121 122 123 124 125 126
6.820068 14.410106 7.838076 13.626457 15.082791 19.454413 9.127350
127 128 129 130 131 132 133
10.577174 6.599669 22.255492 7.884106 10.427687 15.577798 8.449150
134 135 136 137 138 139 140
19.266923 11.836804 14.001414 11.453486 20.851252 9.768428 19.675476
141 142 143 144 145 146 147
9.489641 18.399029 19.249869 8.764803 10.091334 9.708539 15.294224
148 149 150 151 152 153 154
23.260861 12.263359 9.827271 18.367205 10.009538 16.360000 18.223901
155 156 157 158 159 160 161
15.501617 5.307559 15.384852 10.014311 10.384199 12.399148 14.213833
162 163 164 165 166 167 168
13.559146 14.946782 17.351636 11.068295 14.223721 10.824395 13.363247
169 170 171 172 173 174 175
17.186143 17.941556 7.394980 14.358274 7.607692 11.970939 13.744357
176 177 178 179 180 181 182
24.786870 19.979373 12.162046 16.010997 12.384555 10.587200 13.928099
183 184 185 186 187 188 189
6.554670 24.133100 18.538521 20.803011 9.691373 17.076442 18.644306
190 191 192 193 194 195 196
6.051624 12.489159 8.424019 4.466230 18.486958 16.495300 5.370342
197 198 199 200
8.165312 12.785921 23.767321 15.173196
#2
sample <- data.frame(TV = 120, radio = 50)
predict(sr_model_2, sample)
1
17.81139
# Texmin edilen deyer ucun etibarli interval;
predict(sr_model_2, sample, interval = "confidence", level = 0.95)
fit lwr upr
1 17.81139 17.31781 18.30496
# Qalıqlar;
head(resid(sr_model_2), 10)
1 2 3 4 5 6
1.54453537 -1.94536229 -3.03701773 0.88288404 -0.32390813 -5.31208449
7 8 9 10
0.08178759 1.09448447 1.09062080 -1.95169696
# Standartlaşdırılmış qalıqlar;
head(rstudent(sr_model_2), 10)
1 2 3 4 5 6
0.92479275 -1.16912512 -1.84469838 0.52742267 -0.19309584 -3.29506919
7 8 9 10
0.04883717 0.65187992 0.65666812 -1.17197091
# Həqiqi dəyərlər;
head(df1$sales, 10)
[1] 22.1 10.4 9.3 18.5 12.9 7.2 11.8 13.2 4.8 10.6
# Təxmin edilən dəyərlər;
head(predict(sr_model_2), 10)
1 2 3 4 5 6 7
20.555465 12.345362 12.337018 17.617116 13.223908 12.512084 11.718212
8 9 10
12.105516 3.709379 12.551697
# Qalıq (fərq);
ri <- data.frame(
y = head(df1$sales, 10),
y_i = head(predict(sr_model_2), 10))
ri$xeta <- ri$y - ri$y_i
# Ortalama xəta dəyərləri;
ri$ortalama_xeta <- ri$xeta^2
sqrt(mean(ri_ortalama_xeta)) # RMSE
[1] 2.251651
ri
NA
library(ISLR)
df <- Hitters
df
dplyr::glimpse(df) # dataya qısa baxış
Rows: 322
Columns: 20
$ AtBat <int> 293, 315, 479, 496, 321, 594, 185, 298, 323, 401, 57...
$ Hits <int> 66, 81, 130, 141, 87, 169, 37, 73, 81, 92, 159, 53, ...
$ HmRun <int> 1, 7, 18, 20, 10, 4, 1, 0, 6, 17, 21, 4, 13, 0, 7, 3...
$ Runs <int> 30, 24, 66, 65, 39, 74, 23, 24, 26, 49, 107, 31, 48,...
$ RBI <int> 29, 38, 72, 78, 42, 51, 8, 24, 32, 66, 75, 26, 61, 1...
$ Walks <int> 14, 39, 76, 37, 30, 35, 21, 7, 8, 65, 59, 27, 47, 22...
$ Years <int> 1, 14, 3, 11, 2, 11, 2, 3, 2, 13, 10, 9, 4, 6, 13, 3...
$ CAtBat <int> 293, 3449, 1624, 5628, 396, 4408, 214, 509, 341, 520...
$ CHits <int> 66, 835, 457, 1575, 101, 1133, 42, 108, 86, 1332, 13...
$ CHmRun <int> 1, 69, 63, 225, 12, 19, 1, 0, 6, 253, 90, 15, 41, 4,...
$ CRuns <int> 30, 321, 224, 828, 48, 501, 30, 41, 32, 784, 702, 19...
$ CRBI <int> 29, 414, 266, 838, 46, 336, 9, 37, 34, 890, 504, 186...
$ CWalks <int> 14, 375, 263, 354, 33, 194, 24, 12, 8, 866, 488, 161...
$ League <fct> A, N, A, N, N, A, N, A, N, A, A, N, N, A, N, A, N, A...
$ Division <fct> E, W, W, E, E, W, E, W, W, E, E, W, E, E, E, W, W, W...
$ PutOuts <int> 446, 632, 880, 200, 805, 282, 76, 121, 143, 0, 238, ...
$ Assists <int> 33, 43, 82, 11, 40, 421, 127, 283, 290, 0, 445, 45, ...
$ Errors <int> 20, 10, 14, 3, 4, 25, 7, 9, 19, 0, 22, 11, 7, 6, 8, ...
$ Salary <dbl> NA, 475.000, 480.000, 500.000, 91.500, 750.000, 70.0...
$ NewLeague <fct> A, N, A, N, N, A, A, A, N, A, A, N, N, A, N, A, N, A...
colSums(is.na(df))
AtBat Hits HmRun Runs RBI Walks Years
0 0 0 0 0 0 0
CAtBat CHits CHmRun CRuns CRBI CWalks League
0 0 0 0 0 0 0
Division PutOuts Assists Errors Salary NewLeague
0 0 0 0 59 0
# Hədəf dəyişənimizdə 59 ədəd NA mövcuddur. Say çox olduğundan onları silmək lazımdır;
df <- na.omit(df)
df
NA
rownames(df) <- c()
df
NA
library(PerformanceAnalytics)
Loading required package: xts
Loading required package: zoo
Attaching package: 㤼㸱zoo㤼㸲
The following objects are masked from 㤼㸱package:base㤼㸲:
as.Date, as.Date.numeric
Attaching package: 㤼㸱xts㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
first, last
Attaching package: 㤼㸱PerformanceAnalytics㤼㸲
The following object is masked from 㤼㸱package:graphics㤼㸲:
legend
chart.Correlation(df %>%
dplyr::select(-c("League", "NewLeague", "Division")),
histogram = T,
pch = 19)
library(caret)
set.seed(3456)
train_index <- createDataPartition(df$Salary,
p = .8,
list = F,
times = 1) # hədəf dəyişəni 80 və 20 nisbətində bölünür
# İlk 6 sətir;
head(train_index)
Resample1
[1,] 1
[2,] 2
[3,] 4
[4,] 6
[5,] 8
[6,] 9
# Train və test set;
train <- df[train_index, ]
test <- df[-train_index, ]
library(tidyverse)
# Train-də müstəqil dəyişənlər;
train_x <- train %>% dplyr::select(-Salary)
# Train-də asılı dəyişən;
train_y <- train$Salary
# Test-də müstəqil dəyişənlər;
test_x <- test %>% dplyr::select(-Salary)
# Test-də asılı dəyişən;
test_y <- test$Salary
# Train setini tək bir datada birləşdirmək;
training <- data.frame(train_x, Salary = train_y)
training
NA
glimpse(training)
Rows: 212
Columns: 20
$ AtBat <int> 315, 479, 321, 185, 323, 401, 202, 239, 196, 568,...
$ Hits <int> 81, 130, 87, 37, 81, 92, 53, 60, 43, 158, 46, 32,...
$ HmRun <int> 7, 18, 10, 1, 6, 17, 4, 0, 7, 20, 2, 8, 16, 16, 1...
$ Runs <int> 24, 66, 39, 23, 26, 49, 31, 30, 29, 89, 24, 16, 7...
$ RBI <int> 38, 72, 42, 8, 32, 66, 26, 11, 27, 75, 8, 22, 48,...
$ Walks <int> 39, 76, 30, 21, 8, 65, 27, 22, 30, 73, 15, 14, 65...
$ Years <int> 14, 3, 2, 2, 2, 13, 9, 6, 13, 15, 5, 8, 1, 6, 18,...
$ CAtBat <int> 3449, 1624, 396, 214, 341, 5206, 1876, 1941, 3231...
$ CHits <int> 835, 457, 101, 42, 86, 1332, 467, 510, 825, 2273,...
$ CHmRun <int> 69, 63, 12, 1, 6, 253, 15, 4, 36, 177, 5, 24, 16,...
$ CRuns <int> 321, 224, 48, 30, 32, 784, 192, 309, 376, 1045, 6...
$ CRBI <int> 414, 266, 46, 9, 34, 890, 186, 103, 290, 993, 23,...
$ CWalks <int> 375, 263, 33, 24, 8, 866, 161, 207, 238, 732, 39,...
$ League <fct> N, A, N, N, N, A, N, A, N, N, A, N, N, N, A, A, N...
$ Division <fct> W, W, E, E, W, E, W, E, E, W, W, W, E, W, E, E, E...
$ PutOuts <int> 632, 880, 805, 76, 143, 0, 304, 121, 80, 105, 102...
$ Assists <int> 43, 82, 40, 127, 290, 0, 45, 151, 45, 290, 177, 2...
$ Errors <int> 10, 14, 4, 7, 19, 0, 11, 6, 8, 10, 16, 2, 5, 3, 1...
$ NewLeague <fct> N, A, N, A, N, A, N, A, N, N, A, N, N, N, A, A, N...
$ Salary <dbl> 475.000, 480.000, 91.500, 70.000, 75.000, 1100.00...
summary(training)
AtBat Hits HmRun Runs
Min. : 19.0 Min. : 1.0 Min. : 0.00 Min. : 0.00
1st Qu.:279.8 1st Qu.: 70.0 1st Qu.: 5.00 1st Qu.: 32.00
Median :404.0 Median :102.5 Median : 9.00 Median : 50.00
Mean :396.1 Mean :105.2 Mean :11.24 Mean : 53.52
3rd Qu.:512.2 3rd Qu.:138.2 3rd Qu.:17.00 3rd Qu.: 72.00
Max. :687.0 Max. :223.0 Max. :40.00 Max. :130.00
RBI Walks Years CAtBat
Min. : 0.00 Min. : 0.00 Min. : 1.000 Min. : 19.0
1st Qu.: 30.00 1st Qu.: 23.75 1st Qu.: 4.000 1st Qu.: 817.8
Median : 46.00 Median : 37.00 Median : 6.000 Median : 1953.5
Mean : 50.07 Mean : 41.65 Mean : 7.311 Mean : 2621.9
3rd Qu.: 66.75 3rd Qu.: 60.00 3rd Qu.:10.000 3rd Qu.: 3745.0
Max. :117.00 Max. :105.00 Max. :24.000 Max. :14053.0
CHits CHmRun CRuns CRBI
Min. : 4.0 Min. : 0.00 Min. : 2.0 Min. : 3.00
1st Qu.: 205.5 1st Qu.: 15.00 1st Qu.: 102.0 1st Qu.: 90.25
Median : 530.0 Median : 38.00 Median : 256.0 Median : 239.50
Mean : 712.1 Mean : 66.19 Mean : 356.6 Mean : 322.08
3rd Qu.: 972.0 3rd Qu.: 90.50 3rd Qu.: 477.5 3rd Qu.: 419.25
Max. :4256.0 Max. :548.00 Max. :2165.0 Max. :1659.00
CWalks League Division PutOuts Assists
Min. : 1.0 A:105 E: 99 Min. : 0.0 Min. : 0.0
1st Qu.: 71.0 N:107 W:113 1st Qu.: 121.0 1st Qu.: 8.0
Median : 174.5 Median : 229.0 Median : 44.5
Mean : 262.8 Mean : 302.7 Mean :112.7
3rd Qu.: 326.8 3rd Qu.: 327.2 3rd Qu.:173.8
Max. :1566.0 Max. :1320.0 Max. :492.0
Errors NewLeague Salary
Min. : 0.000 A:109 Min. : 67.5
1st Qu.: 3.000 N:103 1st Qu.: 190.0
Median : 6.500 Median : 425.0
Mean : 8.094 Mean : 527.3
3rd Qu.:12.000 3rd Qu.: 750.0
Max. :26.000 Max. :2460.0
funModeling::plot_num(training)
lm_fit <- lm(Salary ~ ., data = training)
summary(lm_fit)
Call:
lm(formula = Salary ~ ., data = training)
Residuals:
Min 1Q Median 3Q Max
-851.67 -173.21 -23.77 137.28 1820.27
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 209.35762 102.57119 2.041 0.04261 *
AtBat -1.97581 0.75896 -2.603 0.00995 **
Hits 7.31701 2.78986 2.623 0.00942 **
HmRun 7.69059 7.28624 1.055 0.29253
Runs -3.87671 3.39100 -1.143 0.25436
RBI -0.68125 3.00225 -0.227 0.82073
Walks 6.88510 2.15191 3.200 0.00161 **
Years -4.78994 14.45690 -0.331 0.74076
CAtBat -0.10468 0.15944 -0.657 0.51227
CHits -0.26095 0.76603 -0.341 0.73373
CHmRun -0.81648 1.84091 -0.444 0.65789
CRuns 1.85303 0.84246 2.200 0.02903 *
CRBI 0.87847 0.76890 1.143 0.25467
CWalks -0.87146 0.39858 -2.186 0.02999 *
LeagueN 27.66900 92.12289 0.300 0.76424
DivisionW -150.20255 45.94953 -3.269 0.00128 **
PutOuts 0.22536 0.08933 2.523 0.01246 *
Assists 0.33338 0.24679 1.351 0.17833
Errors -2.58300 5.30801 -0.487 0.62708
NewLeagueN 30.88019 91.88201 0.336 0.73717
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 323.6 on 192 degrees of freedom
Multiple R-squared: 0.4947, Adjusted R-squared: 0.4447
F-statistic: 9.892 on 19 and 192 DF, p-value: < 2.2e-16
caret::defaultSummary(data.frame(obs = training$Salary,
pred = lm_fit$fitted.values))
RMSE Rsquared MAE
307.9122011 0.4946656 219.5601905
caret::defaultSummary(data.frame(obs = test_y,
pred = predict(lm_fit, test_x)))
RMSE Rsquared MAE
298.9520026 0.6698964 230.0477247
# 10 k-qatlı cross validation;
ctrl <- caret::trainControl(method = "cv",
number = 10)
lm_val_fit <- caret::train(x = train_x,
y = train_y,
method = "lm",
trControl = ctrl)
lm_val_fit
Linear Regression
212 samples
19 predictor
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 190, 190, 191, 190, 191, 192, ...
Resampling results:
RMSE Rsquared MAE
350.5338 0.4379185 252.0284
Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lm_val_fit)
Call:
lm(formula = .outcome ~ ., data = dat)
Residuals:
Min 1Q Median 3Q Max
-851.67 -173.21 -23.77 137.28 1820.27
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 209.35762 102.57119 2.041 0.04261 *
AtBat -1.97581 0.75896 -2.603 0.00995 **
Hits 7.31701 2.78986 2.623 0.00942 **
HmRun 7.69059 7.28624 1.055 0.29253
Runs -3.87671 3.39100 -1.143 0.25436
RBI -0.68125 3.00225 -0.227 0.82073
Walks 6.88510 2.15191 3.200 0.00161 **
Years -4.78994 14.45690 -0.331 0.74076
CAtBat -0.10468 0.15944 -0.657 0.51227
CHits -0.26095 0.76603 -0.341 0.73373
CHmRun -0.81648 1.84091 -0.444 0.65789
CRuns 1.85303 0.84246 2.200 0.02903 *
CRBI 0.87847 0.76890 1.143 0.25467
CWalks -0.87146 0.39858 -2.186 0.02999 *
LeagueN 27.66900 92.12289 0.300 0.76424
DivisionW -150.20255 45.94953 -3.269 0.00128 **
PutOuts 0.22536 0.08933 2.523 0.01246 *
Assists 0.33338 0.24679 1.351 0.17833
Errors -2.58300 5.30801 -0.487 0.62708
NewLeagueN 30.88019 91.88201 0.336 0.73717
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 323.6 on 192 degrees of freedom
Multiple R-squared: 0.4947, Adjusted R-squared: 0.4447
F-statistic: 9.892 on 19 and 192 DF, p-value: < 2.2e-16
names(lm_val_fit)
[1] "method" "modelInfo" "modelType" "results"
[5] "pred" "bestTune" "call" "dots"
[9] "metric" "control" "finalModel" "preProcess"
[13] "trainingData" "resample" "resampledCM" "perfNames"
[17] "maximize" "yLimits" "times" "levels"
lm_val_fit$bestTune
lm_val_fit$finalModel
Call:
lm(formula = .outcome ~ ., data = dat)
Coefficients:
(Intercept) AtBat Hits HmRun Runs
209.3576 -1.9758 7.3170 7.6906 -3.8767
RBI Walks Years CAtBat CHits
-0.6812 6.8851 -4.7899 -0.1047 -0.2610
CHmRun CRuns CRBI CWalks LeagueN
-0.8165 1.8530 0.8785 -0.8715 27.6690
DivisionW PutOuts Assists Errors NewLeagueN
-150.2025 0.2254 0.3334 -2.5830 30.8802
library(pls)
package 㤼㸱pls㤼㸲 was built under R version 4.0.4
Attaching package: 㤼㸱pls㤼㸲
The following object is masked from 㤼㸱package:caret㤼㸲:
R2
The following object is masked from 㤼㸱package:stats㤼㸲:
loadings
pcr_fit <- pcr(Salary ~., data = training,
scale = T,
validation = "CV")
summary(pcr_fit)
Data: X dimension: 212 19
Y dimension: 212 1
Fit method: svdpc
Number of components considered: 19
VALIDATION: RMSEP
Cross-validated using 10 random segments.
(Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps
CV 435.2 348.8 349.4 348.6 349.0 343.7
adjCV 435.2 348.5 349.1 348.1 348.5 342.9
6 comps 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps
CV 346.0 347.5 347.5 349.9 354.3 358.8 360.5
adjCV 345.1 346.5 346.5 348.7 352.9 357.2 358.8
13 comps 14 comps 15 comps 16 comps 17 comps 18 comps
CV 368.4 364.6 367.3 357.8 360.7 358.2
adjCV 366.1 361.9 364.8 355.4 357.9 355.5
19 comps
CV 361.9
adjCV 358.9
TRAINING: % variance explained
1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps
X 38.29 59.84 70.72 78.77 84.10 88.74 92.15
Salary 36.39 37.22 38.25 38.70 41.57 42.70 42.72
8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
X 94.77 96.31 97.24 97.97 98.63 99.18
Salary 42.84 42.95 43.15 43.16 43.20 43.67
14 comps 15 comps 16 comps 17 comps 18 comps 19 comps
X 99.48 99.74 99.88 99.97 99.99 100.00
Salary 45.14 45.18 46.98 48.62 49.37 49.47
validationplot(pcr_fit, val.type = "MSEP")
defaultSummary(data.frame(obs = training$Salary,
pred = as.vector(pcr_fit$fitted.values)))
RMSE Rsquared MAE
326.3749668 0.4322479 224.6325033
predict(pcr_fit, test_x[1:10,], ncomp = 1:2)
, , 1 comps
Salary
3 873.6221
5 644.5249
7 201.7725
10 853.6526
12 442.0163
19 323.2950
28 578.9129
34 289.7256
40 781.8809
52 480.8291
, , 2 comps
Salary
3 859.8978
5 675.7339
7 190.3023
10 901.1176
12 454.3280
19 343.1422
28 646.9877
34 268.3983
40 799.3727
52 543.1138
defaultSummary(data.frame(
obs = test_y,
pred = as.vector(predict(pcr_fit, test_x, nncomp = 1:2))))
RMSE Rsquared MAE
335.2121990 0.6007209 242.1222688
ctrl_pcr <- trainControl(method = "CV", number = 10)
set.seed(123)
pcr_tune <- caret::train(train_x, train_y,
method = "pcr",
trControl = ctrl_pcr,
tuneLength = 20,
preProcess = c("center", "scale"))
pcr_tune
Principal Component Analysis
212 samples
19 predictor
Pre-processing: centered (16), scaled (16), ignore (3)
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 190, 190, 191, 192, 191, 191, ...
Resampling results across tuning parameters:
ncomp RMSE Rsquared MAE
1 336.6196 0.4383181 235.8390
2 335.6383 0.4519356 233.3395
3 339.5381 0.4358460 235.6564
4 342.4885 0.4232284 234.9631
5 343.8012 0.4181171 235.6399
6 345.4532 0.4137980 237.7154
7 347.1181 0.4066132 238.0286
8 347.8549 0.4019441 238.0686
9 343.1562 0.4275097 239.3959
10 343.3225 0.4234104 240.4108
11 345.0653 0.4205086 241.5356
12 351.2683 0.4112054 245.0942
13 349.2697 0.4132485 246.8983
14 350.5473 0.4115544 247.5792
15 351.2237 0.4072276 249.0690
16 352.7575 0.3979376 254.3840
17 346.5537 0.4266372 245.8121
18 346.5486 0.4218345 247.8696
RMSE was used to select the optimal model using the smallest value.
The final value used for the model was ncomp = 2.
plot(pcr_tune)
defaultSummary(data.frame(
obs = test_y,
pred = predict(pcr_tune, test_x)))
RMSE Rsquared MAE
352.7802513 0.5776453 249.4600071
pls_fit <- plsr(Salary ~., data = training)
summary(pls_fit)
Data: X dimension: 212 19
Y dimension: 212 1
Fit method: kernelpls
Number of components considered: 19
TRAINING: % variance explained
1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps
X 97.42 98.72 99.12 99.46 99.62 99.75 99.83
Salary 25.36 34.85 39.48 40.75 41.83 42.65 44.60
8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
X 99.97 99.99 99.99 99.99 100.00 100.00
Salary 45.06 45.24 45.97 46.16 46.23 46.52
14 comps 15 comps 16 comps 17 comps 18 comps 19 comps
X 100.0 100.00 100.00 100.00 100.00 100.00
Salary 46.6 47.11 48.71 49.35 49.47 49.47
validationplot(pls_fit, val.type = "MSEP")
names(pls_fit)
[1] "coefficients" "scores" "loadings"
[4] "loading.weights" "Yscores" "Yloadings"
[7] "projection" "Xmeans" "Ymeans"
[10] "fitted.values" "residuals" "Xvar"
[13] "Xtotvar" "fit.time" "ncomp"
[16] "method" "call" "terms"
[19] "model"
pls_fit$scores
Comp 1 Comp 2 Comp 3 Comp 4 Comp 5
1 836.480084 145.5414661 -291.53492429 -100.7399608 -166.77867263
2 -989.772229 537.0085636 -51.77851658 63.2695071 -16.72991618
4 -2326.017667 371.3299020 -190.43972490 68.2274304 33.40214881
6 -2567.185137 -257.2012651 -37.75844788 71.5873778 72.46307251
8 -2422.694512 -117.9580243 58.37585888 -21.1871389 102.78188485
9 2769.982964 -180.4795804 223.79882069 166.7549454 -235.20784577
11 -827.346556 -132.1573764 -201.36046926 3.8598063 -12.10268261
13 -752.129819 -238.0423765 -76.84543717 -1.8563354 70.43854553
14 569.501478 -353.8887108 -204.11161797 -55.7439008 -22.23211574
15 5744.056999 -137.4064779 93.96610909 -157.3423493 42.05294537
16 -2293.070008 -239.6957741 -51.01500346 37.7485796 85.41615311
17 -2028.935965 -214.6206598 -158.55827798 99.5052522 40.76348153
18 -2329.531677 21.8049167 50.96237680 45.0932481 -54.43585313
20 -724.341157 216.3662761 -11.86129396 -11.0202479 -90.85378900
21 6150.745732 568.3806915 -292.70434466 -170.1016613 173.62744283
22 93.736511 183.6037354 56.07127817 -46.0965653 -76.54911422
23 -762.976192 -138.8336769 -98.39070969 -13.8756920 -41.90161395
24 -299.492698 357.2837164 -124.48735978 -57.3533318 -56.06835128
25 -322.378755 78.0165667 163.41256601 -113.2518458 38.34278616
26 2764.068618 48.2569112 142.23441527 62.9291710 -191.01696573
27 -2154.429755 -38.0159568 34.29602777 9.2039776 -55.12099074
29 -1419.582122 -33.0893811 -223.40488700 17.9096102 8.78491432
30 3826.355043 -212.2983591 71.40546032 87.7596200 209.18324674
31 -2404.298151 54.5458188 -126.33481813 53.8591337 25.87549794
32 -2531.705657 -71.9133701 -131.32516847 109.3939574 37.21858991
33 -2055.468061 -85.9862248 -130.42844616 72.2154739 19.71124355
35 -1788.547888 -127.2701069 139.20483203 -6.1334854 34.46210327
36 -1992.266503 -271.4870315 -32.52170471 109.6619263 24.91724919
37 36.511596 69.1942560 83.73171724 -10.5611594 -106.40455442
38 4154.667682 80.4867725 37.83832984 105.0373668 -13.46395169
39 -2545.066143 -219.9246103 -35.42798539 109.5511325 23.00507033
41 2685.551999 -20.2295728 -26.94537017 60.1975619 -23.25604346
42 -1782.033569 -94.2474390 47.12799076 7.7313802 -78.45373138
43 -1375.178943 -193.1778501 -44.72584006 76.8616564 -45.33615388
44 1094.510579 -290.7400285 -167.66848923 -194.1336427 24.69180729
45 671.714313 157.1647394 343.69439706 -48.6341581 179.66366110
46 -2328.859529 -30.1994028 87.58734917 28.7383799 -12.01295557
47 4111.679391 -477.4134036 -359.91246243 -228.4641135 -261.99911370
48 -1636.429971 -227.9641667 -81.13596309 -38.9588667 65.20062432
49 -1825.396983 -259.0024461 -41.96423330 17.8422991 34.02557731
50 5213.979138 -152.7173240 249.21739911 56.4231404 -153.42537666
51 -2252.439444 -212.4530161 -91.40817294 81.5064996 9.84467876
53 5846.031131 -331.2977977 -282.47289318 -262.2114006 -46.03119964
54 2876.711564 -77.9395964 164.26162343 -51.1661996 -82.85279563
55 5520.253372 491.6928024 45.95044400 98.1664081 -332.37532260
56 4337.398178 86.3635225 214.33941533 150.7513406 -157.39867175
57 1042.530040 -110.8625077 -64.85969984 -165.8186113 166.89575587
58 -1445.455499 -78.0992885 -7.66020810 50.3351618 -0.63780300
59 -492.911059 -106.7896110 8.07392714 6.1657917 -74.61115295
61 3872.292051 -328.8405687 -73.69376668 35.5788596 -41.58937726
63 2605.192513 164.4563989 275.60165819 142.1411544 -92.11046804
64 1284.948482 -44.8096304 -3.07033019 85.0464548 -169.53214477
Comp 6 Comp 7 Comp 8 Comp 9 Comp 10
1 -22.9390609 -3.1480964 12.9994325 42.7290651 7.7467031
2 0.5393059 34.8111993 -1.4324248 39.0847484 -0.9833622
4 0.9079340 2.0258485 -7.9934978 -6.2535303 6.1200551
6 -18.0001309 9.2891397 -37.1760738 -9.9576334 -2.6854386
8 -49.7324970 -43.9421115 -66.4117438 -15.8617005 -1.6581482
9 -59.2945736 -8.3815738 -93.1124349 7.7900923 2.3257692
11 7.7683137 44.0980610 33.9855402 13.7299705 -4.2134515
13 10.6462902 22.6321383 -113.2915943 -33.1677611 6.3210500
14 28.9650667 61.5270190 59.4628982 -8.7366930 -6.3549182
15 7.8623816 33.9178520 107.6937696 42.2382484 -11.2292398
16 -19.8636960 22.8148998 -40.2049907 -20.9589829 -0.3433320
17 -2.1003635 33.8805528 29.5005225 3.4551582 -0.8930491
18 23.7909259 12.5740368 5.8933577 1.1448774 -15.3860234
20 10.0818119 18.9512067 12.7812088 18.7276016 2.8210580
21 21.6861972 -76.1045466 328.1029945 2.2987366 -12.2927963
22 57.1582261 -33.6813809 -116.0658935 -2.1074304 10.0703978
23 16.5810641 -64.7251565 -117.3543701 -41.6449050 7.7335381
24 -13.8269289 -43.2311332 59.4747673 7.0684540 10.3311684
25 9.7360311 53.3916800 -103.6868002 15.6996061 2.2309548
26 13.2163062 62.7041779 -48.2702928 42.9102637 -12.6359603
27 -0.6481610 -100.2711014 -52.7384937 -10.9544841 1.2953373
29 -4.4909943 10.7170951 32.1035344 -19.8154567 0.7955704
30 -18.7537654 -89.7332949 1.3512399 50.1716138 -14.3575345
31 -18.4661640 -47.5878325 -47.6017097 -16.2749169 0.5203372
32 -2.9592242 27.8196434 -4.7566967 13.0642744 5.5608366
33 -9.7015221 -23.8675380 -9.4719855 -34.6815250 -4.9560645
35 -7.8316348 -11.4345184 8.7212169 30.0167469 7.8124607
36 3.3509193 4.1183599 24.6462330 -18.7436139 -2.0677390
37 33.9825829 21.8456367 59.0381213 39.2232626 11.0329414
38 -34.1648404 -81.5177208 35.1596279 -126.6398908 12.8100085
39 2.1118817 -0.3349763 -0.5844183 8.4799955 -2.4618993
41 1.9301515 -46.7341437 -12.0721202 -30.0983452 6.5318629
42 0.6770427 -51.5526519 28.8272693 0.9705514 -14.6363957
43 -9.7440657 -2.9802704 -25.5535447 16.3130428 2.8585702
44 10.8041565 15.4194507 79.0794064 -30.1109335 -1.5601871
45 -19.3065293 82.6404007 41.4756577 -32.8328613 0.9947140
46 -5.9790344 -56.7215649 -4.3305272 -9.3556239 -12.5665204
47 -18.7394248 46.1628767 -75.9500449 59.2799719 -8.2094294
48 -19.0317464 -1.2899155 -35.4993398 -19.3899477 -2.8015743
49 -13.2023078 27.5674217 -48.7188000 -8.5580536 -1.6762951
50 3.3854101 19.0553972 190.1629248 -113.1904871 0.3057777
51 7.6479593 12.0654511 7.3310106 1.8155405 1.3173498
53 -5.9331175 24.4267890 84.0362771 30.5455338 -10.5386418
54 -33.1295938 37.9308204 117.3586390 -26.5199477 -7.8008622
55 -71.5837847 76.0221601 -282.4330250 -17.1382399 -1.5899783
56 12.7744262 84.3722725 -89.7779697 -31.9120265 2.3778415
57 19.5887581 -11.8913570 65.0380827 -39.4336023 -0.7979649
58 24.0318360 -15.0467523 -21.8628449 0.8745118 5.9464599
59 13.8077204 -6.6300186 47.2486763 -14.9691452 2.0388804
61 49.3598574 109.8191216 -190.0110182 -78.1183730 7.6854207
63 -6.8268854 -31.4408837 27.9319737 -43.3349830 10.7329767
64 -14.7925320 37.6211772 -118.3813445 -5.0577464 7.1439705
Comp 11 Comp 12 Comp 13 Comp 14 Comp 15
1 2.120891861 -1.85480580 -2.51013153 -0.67740830 2.33743049
2 2.814594556 1.40452332 -1.72285124 -2.61882587 4.16483532
4 5.055019673 0.58129154 3.81735786 2.36636209 0.63393589
6 -6.945360076 4.83751140 1.99926259 -0.39369689 1.65627887
8 4.394284790 -5.52683455 -0.91053859 3.23174355 4.98869596
9 8.958933086 3.37420676 -1.73588147 1.50744889 -1.64904007
11 -6.095946551 -2.21576099 -5.20661985 3.13347888 0.73428875
13 0.989161007 2.50687519 0.86070774 3.25807812 -0.22582330
14 -1.764014976 1.90299051 -1.16813014 -0.79387889 3.08729203
15 -14.148642633 1.98032494 8.11447938 -4.79219626 0.93075865
16 -9.150847272 -3.11559023 -2.83681740 2.02611951 3.01072789
17 0.649433059 -4.93806047 3.40550038 -3.90326301 -3.63825779
18 -10.030984220 12.52324206 4.11152636 -3.06594394 3.38282411
20 4.938041028 8.34999213 3.25427204 -9.91896562 -0.12122778
21 -2.316954347 2.74190976 -4.13579414 -7.96111360 -2.29606019
22 -5.909642277 6.44738116 -6.10119784 1.18586889 -3.90304263
23 4.739824967 7.10388046 2.21379811 1.62817176 3.00873455
24 1.771814565 1.36148528 -4.79931731 2.37102962 1.95331588
25 -24.038453137 0.62203395 0.58266760 -5.95216710 2.18207335
26 4.405538433 -2.24377767 -8.78630843 -0.65382235 -9.34244680
27 -5.935307002 -0.04104405 -1.44622170 1.28841460 -3.23999142
29 -1.562202583 -3.44955521 -0.48371582 -1.28867566 -4.07995421
30 -2.117991965 -15.56698695 -7.51824052 -10.37111234 7.41647005
31 2.617174712 2.27984576 0.95446280 0.29020549 -0.21723513
32 5.603812252 5.18476184 -1.94262035 -2.99213608 1.48479714
33 -9.706760858 -2.09202301 4.16211223 -2.37025562 -4.19912214
35 -4.080203244 -12.49920353 -5.31430780 3.45121885 5.19370122
36 -0.863004039 -6.00972493 -1.04886049 4.35926870 -2.49355294
37 4.328671580 5.37676774 -4.65695786 -6.31170033 5.63962327
38 9.360374428 0.69378858 -7.02903515 -4.70668110 -4.90118171
39 1.133643747 0.81294588 -3.13016204 0.82720434 -1.59524982
41 3.881817865 -2.46517320 -1.35968796 -6.32238396 1.42780448
42 21.553053197 -7.74165479 -5.13943529 -0.28559915 -8.40905959
43 -0.342802786 3.28114365 5.93829275 -2.94031898 2.04800967
44 5.498202041 -3.25119185 -0.66479862 -2.98067832 -4.21672185
45 -3.469510238 1.23860185 11.31510453 -1.14935366 0.29065048
46 8.295060488 -8.24902689 5.97142178 7.23719908 3.52141392
47 -8.137795490 -18.64594645 6.05317831 4.49076034 0.08643190
48 -7.585012703 -4.44742554 -3.37972284 -0.37824485 -0.79815890
49 -7.681535253 -2.06834734 -2.55400370 2.34355166 0.93079817
50 3.364451706 18.26247705 1.27178646 11.32297371 0.23292516
51 -3.106216678 -2.13859037 -0.79507251 -0.05743564 -1.11062826
53 -13.304767738 6.27671358 1.31596038 12.10377637 0.22466953
54 6.182853182 -8.80182234 -2.41076736 -4.54283829 -2.11361725
55 1.312687335 -13.89917189 4.49708169 -1.29323529 -3.88524464
56 8.375782785 -9.89109581 -4.93301450 -10.80313569 0.42439610
57 -4.685008352 -7.21415555 2.02286397 -4.19354991 -5.25063968
58 -8.423251632 2.64456599 -1.38615518 -0.31365357 -2.90802126
59 -6.789018791 -2.39107037 2.65691724 -3.31583109 -0.64630346
61 4.604130841 -13.80800020 -6.36819808 2.21071793 3.61615928
63 -4.164497104 -1.17127603 4.91594695 -6.97922251 5.47199499
64 -2.363769023 -0.89171437 -2.39980992 2.06244353 -2.69183062
Comp 16 Comp 17 Comp 18 Comp 19
1 -2.246049578 1.9036963599 -0.690015489 3.667030e-02
2 -2.078277398 -2.0491214097 0.600800499 2.877698e-02
4 2.335838411 0.0820015756 -0.131529055 3.283069e-02
6 1.594000229 0.0152557223 0.557641488 -6.681750e-01
8 -0.965091484 -0.5178271993 -0.726867876 4.828924e-02
9 1.077216450 0.3437369433 0.692865092 1.083278e-02
11 -1.155681804 0.9280673681 -0.805199856 7.136376e-02
13 0.267337495 0.0006340141 0.897737210 4.433659e-02
14 0.511238424 3.0888338313 -0.233674111 5.974342e-02
15 0.735586916 -1.8803089596 -0.584986790 -8.011405e-02
16 -3.294370151 0.2999875277 0.297502176 7.855696e-02
17 -1.272327982 2.0475495219 -1.042054261 5.022184e-02
18 2.624974194 0.1327843990 -0.026649658 -1.406803e-02
20 -0.265029205 -0.2533804877 -0.746091244 -1.779592e-02
21 1.565946134 0.4702883476 1.203970427 -6.760266e-02
22 1.354818185 -0.3457557198 0.761824539 -3.248423e-02
23 1.232010857 1.4957940935 -0.256044819 6.688481e-03
24 -2.130617668 1.8219316044 -0.910650882 2.938346e-02
25 -0.600044979 -0.5070954958 -0.829095955 -3.151889e-02
26 -0.939655308 -0.0217373056 -0.126294881 6.637382e-04
27 0.625630093 -0.4400182837 -0.956509258 -2.251215e-02
29 -1.739615134 -0.2108971157 0.238388985 4.726461e-02
30 -1.103875309 0.7791370006 -0.584134010 -1.626882e-02
31 0.335341764 -1.1599171990 -0.692089838 2.217093e-02
32 1.005126228 -0.8337585277 1.271287599 6.515750e-02
33 1.542636771 0.3863807149 0.837893484 6.830874e-03
35 -0.579597223 -0.8801572745 -0.787452306 4.968683e-02
36 0.801459343 0.7841559883 0.717228918 6.067922e-02
37 -0.193675633 -1.4136238933 -0.595024204 1.490113e-02
38 -1.331560430 0.6302445355 -0.389649615 7.477013e-03
39 3.465475859 -0.1284745744 -0.111442481 4.387189e-02
41 0.981529752 0.1933458149 0.854191415 -7.749174e-03
42 0.829589916 0.7010007891 -1.214591302 1.605461e-02
43 0.125665451 -1.3694193861 -0.596451314 1.305186e-02
44 0.093682318 0.3423597845 -0.944080924 1.893737e-02
45 -0.261358807 0.4865889856 0.570704055 -1.488536e-02
46 0.490125089 -0.3707743248 0.888765141 1.940974e-02
47 3.845428558 -1.2502092920 0.173512854 4.583196e-04
48 -1.187320503 -1.6149431426 0.429112307 4.299802e-02
49 0.008626597 -0.8464996991 -0.769275653 5.507461e-02
50 0.764063438 0.4847743010 0.294246796 1.927723e-02
51 -1.174522652 -1.7256385198 0.413306016 5.124802e-02
53 1.087346414 -2.9251619612 -0.291004380 -6.634196e-03
54 -1.521021118 -0.0016626577 -0.079218595 1.842638e-02
55 1.027392589 0.6031731968 0.316537783 -2.254392e-02
56 1.191831346 0.5008508653 0.373343594 1.166198e-02
57 2.448886852 0.5273479547 0.182243588 6.821890e-01
58 -0.270217276 -1.2189614664 -0.298813088 -7.018533e-01
59 -1.133468071 -1.4312144434 0.240429248 1.107792e-03
61 2.253838152 0.3620596306 -0.695035203 7.920000e-02
63 -0.781127372 0.2538990462 -1.252671890 -3.409345e-02
64 -0.668247532 -2.2824173012 0.195014213 2.842597e-02
[ reached getOption("max.print") -- omitted 160 rows ]
attr(,"class")
[1] "scores"
predict(pls_fit, test_x[1:10,], ncomp = 1:2)
, , 1 comps
Salary
3 816.5882
5 690.9917
7 322.1918
10 721.6201
12 421.0743
19 317.5990
28 433.9740
34 370.5644
40 706.0230
52 353.7624
, , 2 comps
Salary
3 788.1814
5 685.7672
7 238.0595
10 752.5812
12 399.3663
19 362.0479
28 429.9433
34 356.6869
40 655.4146
52 334.3595
defaultSummary(data.frame(
obs = test_y,
pred = as.vector(predict(pls_fit, test_x))))
RMSE Rsquared MAE
304.9243070 0.6773935 225.3039069
ctrl_pls <- trainControl(method = "CV", number = 10)
pls_tune <- caret::train(train_x, train_y,
method = "pls",
trControl = ctrl_pls,
tuneLength = 20,
preProcess = c("center", "scale"))
pls_tune
Partial Least Squares
212 samples
19 predictor
Pre-processing: centered (16), scaled (16), ignore (3)
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 191, 191, 190, 191, 192, 191, ...
Resampling results across tuning parameters:
ncomp RMSE Rsquared MAE
1 328.1563 0.4592184 231.7066
2 329.6427 0.4484560 231.9090
3 332.7712 0.4252949 235.8339
4 335.3552 0.4164914 240.3414
5 336.4160 0.4106035 245.6530
6 334.3583 0.4127177 244.5619
7 335.3392 0.4081180 247.1923
8 338.8515 0.3955681 248.6582
9 332.8612 0.4106789 245.2990
10 333.1715 0.4097490 246.0435
11 334.0356 0.4114774 245.4046
12 334.8708 0.4073478 246.3377
13 334.8165 0.4061263 246.4755
14 334.3953 0.4079212 245.3408
15 336.4871 0.4019140 247.7324
16 336.3987 0.4022513 247.5310
17 335.6587 0.4031485 247.0786
18 336.4148 0.4026413 247.8055
RMSE was used to select the optimal model using the smallest value.
The final value used for the model was ncomp = 1.
plot(pls_tune)
pls_tune$results
NA
library(tidyverse)
train_x_num <- train_x %>%
dplyr::select(-c("League", "NewLeague", "Division"))
library(glmnet)
ridge_fit <- glmnet(x = as.matrix(train_x_num),
y = train_y,
alpha = 0)
ridge_fit
Call: glmnet(x = as.matrix(train_x_num), y = train_y, alpha = 0)
Df %Dev Lambda
1 16 0.00 232000
2 16 1.08 211400
3 16 1.19 192600
4 16 1.30 175500
5 16 1.42 159900
6 16 1.56 145700
7 16 1.70 132800
8 16 1.86 121000
9 16 2.04 110200
10 16 2.23 100400
11 16 2.43 91500
12 16 2.66 83370
13 16 2.90 75970
14 16 3.17 69220
15 16 3.46 63070
16 16 3.77 57470
17 16 4.10 52360
18 16 4.47 47710
19 16 4.86 43470
20 16 5.28 39610
21 16 5.74 36090
22 16 6.23 32880
23 16 6.75 29960
24 16 7.31 27300
25 16 7.91 24880
26 16 8.54 22670
27 16 9.22 20650
28 16 9.93 18820
29 16 10.68 17150
30 16 11.48 15620
31 16 12.31 14230
32 16 13.17 12970
33 16 14.08 11820
34 16 15.01 10770
35 16 15.97 9811
36 16 16.97 8940
37 16 17.98 8146
38 16 19.01 7422
39 16 20.05 6763
40 16 21.10 6162
41 16 22.15 5614
42 16 23.19 5116
43 16 24.23 4661
44 16 25.24 4247
45 16 26.24 3870
46 16 27.21 3526
47 16 28.14 3213
48 16 29.04 2927
49 16 29.90 2667
50 16 30.71 2430
51 16 31.49 2214
52 16 32.21 2018
53 16 32.89 1838
54 16 33.52 1675
55 16 34.10 1526
56 16 34.64 1391
57 16 35.14 1267
58 16 35.60 1155
59 16 36.01 1052
60 16 36.39 959
61 16 36.74 873
62 16 37.06 796
63 16 37.35 725
64 16 37.62 661
65 16 37.87 602
66 16 38.09 548
67 16 38.30 500
68 16 38.50 455
69 16 38.68 415
70 16 38.85 378
71 16 39.01 344
72 16 39.16 314
73 16 39.31 286
74 16 39.45 261
75 16 39.59 237
76 16 39.72 216
77 16 39.85 197
78 16 39.98 180
79 16 40.11 164
80 16 40.24 149
81 16 40.37 136
82 16 40.50 124
83 16 40.63 113
84 16 40.77 103
85 16 40.90 94
86 16 41.04 85
87 16 41.18 78
88 16 41.32 71
89 16 41.47 65
90 16 41.62 59
91 16 41.76 54
92 16 41.92 49
93 16 42.07 44
94 16 42.22 41
95 16 42.38 37
96 16 42.54 34
97 16 42.69 31
98 16 42.85 28
99 16 43.01 25
100 16 43.16 23
summary(ridge_fit)
Length Class Mode
a0 100 -none- numeric
beta 1600 dgCMatrix S4
df 100 -none- numeric
dim 2 -none- numeric
lambda 100 -none- numeric
dev.ratio 100 -none- numeric
nulldev 1 -none- numeric
npasses 1 -none- numeric
jerr 1 -none- numeric
offset 1 -none- logical
call 4 -none- call
nobs 1 -none- numeric
names(ridge_fit)
[1] "a0" "beta" "df" "dim" "lambda"
[6] "dev.ratio" "nulldev" "npasses" "jerr" "offset"
[11] "call" "nobs"
ridge_fit$beta
16 x 100 sparse Matrix of class "dgCMatrix"
[[ suppressing 29 column names 㤼㸱s0㤼㸲, 㤼㸱s1㤼㸲, 㤼㸱s2㤼㸲 ... ]]
AtBat 1.114750e-36 0.0022284306 0.0024421969 0.0026761228
Hits 3.905185e-36 0.0078170739 0.0085680834 0.0093901268
HmRun 1.697246e-35 0.0339036599 0.0371533828 0.0407089615
Runs 6.564780e-36 0.0131343339 0.0143955308 0.0157758482
RBI 7.535572e-36 0.0150714811 0.0165181511 0.0181013449
Walks 8.665332e-36 0.0173591455 0.0190284182 0.0208558199
Years 3.478507e-35 0.0693893445 0.0760301960 0.0832940476
CAtBat 9.602500e-38 0.0001918348 0.0002102252 0.0002303465
CHits 3.494530e-37 0.0006984655 0.0007654624 0.0008387716
CHmRun 2.772735e-36 0.0055414193 0.0060728997 0.0066544370
CRuns 7.096154e-37 0.0014186429 0.0015547548 0.0017036951
CRBI 7.406104e-37 0.0014802216 0.0016222010 0.0017775531
CWalks 7.991921e-37 0.0015968970 0.0017500239 0.0019175645
PutOuts 3.726573e-37 0.0007493082 0.0008216589 0.0009009216
Assists 2.428554e-37 0.0004885661 0.0005357667 0.0005874818
Errors 3.263171e-36 0.0065375772 0.0071662747 0.0078545101
AtBat 0.0029320155 0.0032118495 0.0035177592 0.0038520482
Hits 0.0102896403 0.0112736447 0.0123497413 0.0135261488
HmRun 0.0445977890 0.0488496927 0.0534968877 0.0585741116
Runs 0.0172860803 0.0189379667 0.0207442107 0.0227185384
RBI 0.0198334168 0.0217277937 0.0237989991 0.0260627198
Walks 0.0228558007 0.0250440762 0.0274376778 0.0300550393
Years 0.0912362365 0.0999169757 0.1094011960 0.1197587878
CAtBat 0.0002523543 0.0002764176 0.0003027191 0.0003314556
CHits 0.0009189630 0.0010066553 0.0011025168 0.0012072692
CHmRun 0.0072905552 0.0079861561 0.0087465373 0.0095774161
CRuns 0.0018666264 0.0020448074 0.0022395994 0.0024524720
CRBI 0.0019474883 0.0021333167 0.0023364545 0.0025584300
CWalks 0.0021008215 0.0023012048 0.0025202376 0.0027595635
PutOuts 0.0009877417 0.0010828223 0.0011869280 0.0013008903
Assists 0.0006441340 0.0007061838 0.0007741323 0.0008485245
Errors 0.0086077364 0.0094318675 0.0103333105 0.0113189977
AtBat 0.0042171985 0.0046158790 0.005050954 0.005525489 0.0060427594
Hits 0.0148117400 0.0162160778 0.017749450 0.019422904 0.0212482706
HmRun 0.0641187539 0.0701709768 0.076773825 0.083973316 0.0918185101
Runs 0.0248757584 0.0272318190 0.029803863 0.032610278 0.0356707410
RBI 0.0285358717 0.0312366633 0.034184656 0.037400820 0.0409075763
Walks 0.0329160851 0.0360423186 0.039456908 0.043184771 0.0472526509
Years 0.1310648281 0.1433997838 0.156849682 0.171506237 0.1874669207
CAtBat 0.0003628391 0.0003970975 0.000434475 0.000475233 0.0005196501
CHits 0.0013216897 0.0014466146 0.001582941 0.001731630 0.0018937077
CHmRun 0.0104849517 0.0114757668 0.012556968 0.013736164 0.0150214776
CRuns 0.0026850090 0.0029389146 0.003216019 0.003518281 0.0038477967
CRBI 0.0028008899 0.0030656051 0.003354476 0.003669538 0.0040129631
CWalks 0.0030209523 0.0033063069 0.003617668 0.003957220 0.0043272931
PutOuts 0.0014256123 0.0015620738 0.001711337 0.001874552 0.0020529615
Assists 0.0009299530 0.0010190608 0.001116546 0.001223163 0.0013397313
Errors 0.0123964185 0.0135736506 0.014859391 0.016262985 0.0177944499
AtBat 0.0066062497 0.0072196592 0.0078876392 0.008613053 0.0094007967
Hits 0.0232381956 0.0254061535 0.0277682104 0.030336553 0.0331285460
HmRun 0.1003615482 0.1096576510 0.1197728716 0.130755150 0.1426745553
Runs 0.0390062520 0.0426391599 0.0465948993 0.050895584 0.0555689484
RBI 0.0447288399 0.0488900366 0.0534198874 0.058343813 0.0636931202
Walks 0.0516891877 0.0565249778 0.0617940991 0.067528652 0.0737664622
Years 0.2048349552 0.2237192227 0.2442516962 0.266521856 0.2906676474
CAtBat 0.0005680228 0.0006206654 0.0006779347 0.000740137 0.0008076573
CHits 0.0020702671 0.0022624686 0.0024715860 0.002698834 0.0029456078
CHmRun 0.0164215597 0.0179455913 0.0196034478 0.021405071 0.0233613206
CRuns 0.0042067981 0.0045976571 0.0050228917 0.005485139 0.0059871865
CRBI 0.0043870667 0.0047943056 0.0052372698 0.005718713 0.0062415021
CWalks 0.0047303687 0.0051690780 0.0056461953 0.006164659 0.0067275299
PutOuts 0.0022479088 0.0024608412 0.0026933138 0.002947005 0.0032237083
Assists 0.0014671338 0.0016063241 0.0017583259 0.001924250 0.0021052794
Errors 0.0194644990 0.0212845559 0.0232666879 0.025423875 0.0277696291
AtBat 0.0102553893 0.0111815163 0.012184005 0.013267790 0.014437870
Hits 0.0361610237 0.0394515945 0.043018577 0.046880919 0.051058082
HmRun 0.1555977086 0.1695934218 0.184732241 0.201085866 0.218726430
Runs 0.0606426694 0.0661456065 0.072107682 0.078559715 0.085533213
RBI 0.0694991425 0.0757945008 0.082612953 0.089989194 0.097958604
Walks 0.0805462304 0.0879086192 0.095896161 0.104553121 0.113925312
Years 0.3168175013 0.3451029898 0.375657707 0.408615871 0.444110631
CAtBat 0.0008808764 0.0009601885 0.001045999 0.001138720 0.001238771
CHits 0.0032133246 0.0035034575 0.003817526 0.004157088 0.004523723
CHmRun 0.0254833923 0.0277829029 0.030271828 0.032962422 0.035867119
CRuns 0.0065319439 0.0071224354 0.007761787 0.008453206 0.009199959
CRBI 0.0068086359 0.0074232280 0.008088490 0.008807709 0.009584224
CWalks 0.0073380061 0.0079993996 0.008715120 0.009488650 0.010323514
PutOuts 0.0035253441 0.0038539631 0.004211750 0.004601023 0.005024241
Assists 0.0023026845 0.0025178227 0.002752141 0.003007179 0.003284569
Errors 0.0303181904 0.0330844327 0.036083795 0.039332190 0.042845882
AtBat 0.015699255 0.017056901 0.018515637 0.020080075 0.021754509
Hits 0.055569910 0.060436453 0.065677764 0.071313659 0.077363436
HmRun 0.237725620 0.258153633 0.280077948 0.303561907 0.328663107
Runs 0.093060110 0.101172444 0.109901973 0.119279734 0.129335522
RBI 0.106556934 0.115819925 0.125782859 0.136480029 0.147944141
Walks 0.124059847 0.135004841 0.146809036 0.159521362 0.173190423
Years 0.482272027 0.523224610 0.567084677 0.613957126 0.663931926
CAtBat 0.001346567 0.001462521 0.001587032 0.001720481 0.001863221
CHits 0.004919025 0.005344575 0.005801928 0.006292585 0.006817966
CHmRun 0.038998408 0.042368689 0.045990101 0.049874324 0.054032356
CRuns 0.010005342 0.010872644 0.011805109 0.012805884 0.013877968
CRBI 0.010421393 0.011322552 0.012290974 0.013329813 0.014442049
CWalks 0.011223245 0.012191337 0.013231194 0.014346075 0.015539024
PutOuts 0.005483996 0.005983020 0.006524173 0.007110447 0.007744950
Assists 0.003586034 0.003913392 0.004268549 0.004653498 0.005070313
Errors 0.046641330 0.050734993 0.055143082 0.059881274 0.064964357
AtBat 0.023542803 ......
Hits 0.083845565 ......
HmRun 0.355431597 ......
Runs 0.140097311 ......
RBI 0.160205636 ......
Walks 0.187863919 ......
Years 0.717080226 ......
CAtBat 0.002015569 ......
CHits 0.007379379 ......
CHmRun 0.058474266 ......
CRuns 0.015024154 ......
CRBI 0.015630421 ......
CWalks 0.016812796 ......
PutOuts 0.008430905 ......
Assists 0.005521146 ......
Errors 0.070405817 ......
.....suppressing 71 columns in show(); maybe adjust 'options(max.print= *, width = *)'
..............................
plot(ridge_fit, xvar = "lambda", label = T)
min(log(ridge_fit$lambda))
[1] 3.144099
ridge_cv_fit <- cv.glmnet(x = as.matrix(train_x_num),
y = train_y,
alpha = 0)
ridge_cv_fit
Call: cv.glmnet(x = as.matrix(train_x_num), y = train_y, alpha = 0)
Measure: Mean-Squared Error
Lambda Index Measure SE Nonzero
min 549 66 125384 22427 16
1se 5116 42 147218 22135 16
plot(ridge_cv_fit)
ridge_cv_fit$lambda.1se
[1] 5115.631
# Minimum lambdaya və 1se lambdasına qarşılıq gələn əmsallar;
coef(ridge_cv_fit, "lambda.min")
17 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 5.742569e+01
AtBat 8.673013e-02
Hits 5.524113e-01
HmRun 1.319139e+00
Runs 7.797535e-01
RBI 8.956840e-01
Walks 1.465627e+00
Years 1.994042e+00
CAtBat 1.002344e-02
CHits 4.359216e-02
CHmRun 3.284987e-01
CRuns 9.404760e-02
CRBI 9.041166e-02
CWalks 8.364211e-02
PutOuts 9.661703e-02
Assists 6.625936e-02
Errors -5.491185e-05
coef(ridge_cv_fit, "lambda.1se")
17 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 3.199871e+02
AtBat 5.721725e-02
Hits 2.118994e-01
HmRun 8.506853e-01
Runs 3.491911e-01
RBI 3.968356e-01
Walks 4.856579e-01
Years 1.667691e+00
CAtBat 4.885231e-03
CHits 1.813815e-02
CHmRun 1.432053e-01
CRuns 3.714255e-02
CRBI 3.835911e-02
CWalks 4.093089e-02
PutOuts 2.360818e-02
Assists 1.552352e-02
Errors 1.736689e-01
library(broom)
tidy(ridge_cv_fit)
NA
test_x_num <- test_x %>%
dplyr::select(-c("League", "NewLeague", "Division"))
caret::defaultSummary(data.frame(
obs = test_y,
pred = as.vector(predict(ridge_cv_fit, as.matrix(test_x_num)))))
RMSE Rsquared MAE
439.4052517 0.5892548 323.1292676
caret::defaultSummary(data.frame(
obs = test_y,
pred = as.vector(predict(ridge_cv_fit, as.matrix(test_x_num),
s = "lambda.min"))))
RMSE Rsquared MAE
359.4355060 0.6165164 257.4837689
ctr_ridge <- caret::trainControl(method = "cv", number = 10)
# Müəyyən lambda dəyərləri arasında parametrlərin axtarılması;
ridge_grid <- data.frame(
lambda = seq(0, 1, length = 15))
set.seed(123)
ridge_tune <- caret::train(train_x_num, train_y,
method = "ridge",
trControl = ctr_ridge,
tuneGrid = ridge_grid,
preProc = c("center", "scale"))
ridge_tune
Ridge Regression
212 samples
16 predictor
Pre-processing: centered (16), scaled (16)
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 190, 190, 191, 192, 191, 191, ...
Resampling results across tuning parameters:
lambda RMSE Rsquared MAE
0.00000000 358.4258 0.3883199 250.6471
0.07142857 348.3914 0.4156422 240.1075
0.14285714 349.0004 0.4185761 239.2787
0.21428571 350.4273 0.4209666 239.7485
0.28571429 352.5350 0.4229924 241.1037
0.35714286 355.2414 0.4247269 243.4222
0.42857143 358.4824 0.4262254 246.2573
0.50000000 362.2030 0.4275313 249.8824
0.57142857 366.3544 0.4286784 253.7070
0.64285714 370.8920 0.4296935 257.8201
0.71428571 375.7753 0.4305978 262.1007
0.78571429 380.9671 0.4314082 266.6363
0.85714286 386.4330 0.4321385 271.2676
0.92857143 392.1418 0.4327997 276.2549
1.00000000 398.0645 0.4334011 281.4149
RMSE was used to select the optimal model using the smallest value.
The final value used for the model was lambda = 0.07142857.
plot(ridge_tune)
# Ən yaxşı modelə sahib lamda dəyərinin seçilməsi;
ridge_tune$bestTune %>% dplyr::filter(
lambda == as.numeric(ridge_tune$bestTune))
# Modelin test xətası;
defaultSummary(data.frame(
obs = test_y,
pred = as.vector(predict(ridge_tune, as.matrix(test_x_num)))))
RMSE Rsquared MAE
306.6493757 0.6858157 220.8024090
library(tidyverse)
train_x_num_lasso <- train_x %>%
dplyr::select(-c("League", "NewLeague", "Division"))
library(glmnet)
lasso_fit <- glmnet(x = as.vector(train_x_num_lasso),
y = train_y,
alpha = 1)
lasso_fit
Call: glmnet(x = as.vector(train_x_num_lasso), y = train_y, alpha = 1)
Df %Dev Lambda
1 0 0.00 232.000
2 1 4.87 211.400
3 1 8.91 192.600
4 2 12.32 175.500
5 3 16.30 159.900
6 4 20.01 145.700
7 4 23.21 132.800
8 4 25.87 121.000
9 4 28.08 110.200
10 5 29.94 100.400
11 5 31.54 91.500
12 5 32.87 83.370
13 5 33.98 75.970
14 6 34.89 69.220
15 6 35.88 63.070
16 6 36.70 57.470
17 6 37.38 52.360
18 6 37.94 47.710
19 6 38.41 43.470
20 6 38.80 39.610
21 6 39.12 36.090
22 6 39.39 32.880
23 6 39.61 29.960
24 7 39.80 27.300
25 7 39.95 24.880
26 7 40.08 22.670
27 7 40.19 20.650
28 7 40.28 18.820
29 7 40.36 17.150
30 7 40.42 15.620
31 7 40.47 14.230
32 8 40.53 12.970
33 8 40.58 11.820
34 8 40.62 10.770
35 8 40.66 9.811
36 8 40.69 8.940
37 8 40.72 8.146
38 10 40.98 7.422
39 10 41.50 6.763
40 10 41.93 6.162
41 11 42.29 5.614
42 12 42.80 5.116
43 12 43.27 4.661
44 12 43.64 4.247
45 12 43.96 3.870
46 12 44.22 3.526
47 12 44.44 3.213
48 12 44.62 2.927
49 12 44.77 2.667
50 12 44.89 2.430
51 13 45.02 2.214
52 13 45.12 2.018
53 14 45.25 1.838
54 14 45.39 1.675
55 15 45.51 1.526
56 15 45.63 1.391
57 15 45.72 1.267
58 15 45.80 1.155
59 15 45.86 1.052
60 15 45.91 0.959
61 15 45.96 0.873
62 15 45.99 0.796
63 15 46.02 0.725
64 15 46.05 0.661
65 15 46.07 0.602
66 15 46.09 0.548
67 15 46.10 0.500
68 15 46.12 0.455
69 15 46.12 0.415
70 15 46.13 0.378
71 15 46.13 0.344
72 14 46.14 0.314
73 14 46.15 0.286
74 14 46.15 0.261
75 14 46.15 0.237
76 14 46.16 0.216
77 14 46.16 0.197
78 14 46.16 0.180
79 15 46.16 0.164
80 16 46.17 0.149
81 16 46.17 0.136
82 16 46.17 0.124
83 16 46.18 0.113
84 16 46.18 0.103
85 16 46.19 0.094
86 16 46.19 0.085
names(lasso_fit)
[1] "a0" "beta" "df" "dim"
[5] "lambda" "dev.ratio" "nulldev" "npasses"
[9] "jerr" "offset" "call" "nobs"
lasso_fit$beta
16 x 86 sparse Matrix of class "dgCMatrix"
[[ suppressing 24 column names 㤼㸱s0㤼㸲, 㤼㸱s1㤼㸲, 㤼㸱s2㤼㸲 ... ]]
AtBat . . . . .
Hits . . . . .
HmRun . . . . .
Runs . . . . .
RBI . . . . .
Walks . . . . 0.42260876
Years . . . . .
CAtBat . . . . .
CHits . . . . .
CHmRun . . . . .
CRuns . 0.06240987 0.1192754 0.163584507 0.17835149
CRBI . . . 0.008777675 0.03423047
CWalks . . . . .
PutOuts . . . . .
Assists . . . . .
Errors . . . . .
AtBat . . . .
Hits . . . .
HmRun . . . .
Runs . . . .
RBI 0.16228472 0.42786994 0.66931872 0.89029423
Walks 0.79538881 1.06400873 1.30897548 1.53191160
Years . . . .
CAtBat . . . .
CHits . . . .
CHmRun . . . .
CRuns 0.19774937 0.22143421 0.24284138 0.26255145
CRBI 0.04687117 0.05048377 0.05395959 0.05689975
CWalks . . . .
PutOuts . . . .
Assists . . . .
Errors . . . .
AtBat . . . .
Hits 0.03697630 0.14632952 0.24557635 0.3359564
HmRun . . . .
Runs . . . .
RBI 1.03902389 1.06983242 1.09834960 1.1244394
Walks 1.72333892 1.87379938 2.01120489 2.1363864
Years . . . .
CAtBat . . . .
CHits . . . .
CHmRun . . . .
CRuns 0.27743892 0.28389686 0.28984590 0.2952909
CRBI 0.06340362 0.07788381 0.09100408 0.1029331
CWalks . . . .
PutOuts . . . .
Assists . . . .
Errors . . . .
AtBat . . . .
Hits 4.182715e-01 0.48796880 0.55102578 0.60829556
HmRun . . . .
Runs . . . .
RBI 1.148287e+00 1.13530377 1.12445017 1.11491682
Walks 2.250435e+00 2.31713006 2.37786099 2.43317040
Years . . . .
CAtBat . . . .
CHits . . . .
CHmRun . . . .
CRuns 3.002698e-01 0.30874210 0.31665975 0.32395493
CRBI 1.137840e-01 0.12076234 0.12691019 0.13242667
CWalks . . . .
PutOuts 4.700926e-05 0.01702935 0.03250521 0.04660859
Assists . . . .
Errors . . . .
AtBat . . . .
Hits 0.66111714 0.70874512 0.75185041 0.79083005
HmRun . . . .
Runs . . . .
RBI 1.10493956 1.09680037 1.08989609 1.08406353
Walks 2.48374849 2.52977530 2.57174452 2.61011062
Years . . . .
CAtBat . . . .
CHits . . . .
CHmRun . . . .
CRuns 0.33029598 0.33629310 0.34185825 0.34699744
CRBI 0.13777378 0.14241489 0.14653593 0.15021507
CWalks . . . .
PutOuts 0.05944956 0.07115614 0.08182506 0.09154679
Assists . . . .
Errors . . . .
AtBat . . . ......
Hits 0.8272377 0.8578160 0.891875055 ......
HmRun . . . ......
Runs . . . ......
RBI 1.0772358 1.0749446 1.060475030 ......
Walks 2.6449026 2.6778366 2.705721675 ......
Years . . . ......
CAtBat . . . ......
CHits . . . ......
CHmRun . . 0.004899165 ......
CRuns 0.3513853 0.3559261 0.357891758 ......
CRBI 0.1538835 0.1566182 0.160394055 ......
CWalks . . . ......
PutOuts 0.1003985 0.1084661 0.115806452 ......
Assists . . . ......
Errors . . . ......
.....suppressing 62 columns in show(); maybe adjust 'options(max.print= *, width = *)'
..............................
plot(lasso_fit, xvar = "lambda", label = T)
lasso_cv_fit <- cv.glmnet(x = as.matrix(train_x_num_lasso),
y = train_y,
alpha = 1)
lasso_cv_fit
Call: cv.glmnet(x = as.matrix(train_x_num_lasso), y = train_y, alpha = 1)
Measure: Mean-Squared Error
Lambda Index Measure SE Nonzero
min 22.67 26 130420 27515 7
1se 132.75 7 154497 27484 4
plot(lasso_cv_fit)
coef(lasso_cv_fit)
17 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) 366.38171437
AtBat .
Hits .
HmRun .
Runs .
RBI 0.42786994
Walks 1.06400873
Years .
CAtBat .
CHits .
CHmRun .
CRuns 0.22143421
CRBI 0.05048377
CWalks .
PutOuts .
Assists .
Errors .
tidy(lasso_cv_fit)
glance(lasso_cv_fit)
NA
caret::defaultSummary(data.frame(
obs = test_y,
pred = as.vector(predict(lasso_cv_fit, as.matrix(test_x_num_lasso)))))
RMSE Rsquared MAE
444.8345589 0.5327518 324.4057579
ctr_lasso <- caret::trainControl(method = "cv", number = 10)
# Müəyyən lambda dəyərləri arasında parametrlərin axtarılması;
lasso_grid <- data.frame(
fraction = seq(.05, 1, length = 20))
set.seed(123)
lasso_tune <- caret::train(train_x_num_lasso, train_y,
method = "lasso",
trControl = ctr_lasso,
tuneGrid = lasso_grid,
preProc = c("center", "scale"))
lasso_tune
The lasso
212 samples
16 predictor
Pre-processing: centered (16), scaled (16)
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 190, 190, 191, 192, 191, 191, ...
Resampling results across tuning parameters:
fraction RMSE Rsquared MAE
0.05 374.5882 0.3602778 283.8127
0.10 349.7210 0.3953406 249.4691
0.15 348.9075 0.4069313 241.1939
0.20 348.6838 0.4103589 241.1428
0.25 348.3881 0.4122798 241.3826
0.30 347.9900 0.4148782 241.4851
0.35 347.6034 0.4168635 241.4334
0.40 347.6243 0.4164536 241.9243
0.45 348.0957 0.4145211 242.8076
0.50 348.6302 0.4124820 243.5701
0.55 349.2625 0.4101801 244.0555
0.60 349.8635 0.4084392 244.4468
0.65 350.5985 0.4064595 244.8605
0.70 351.7086 0.4033787 245.5914
0.75 352.8230 0.4002894 246.4468
0.80 353.7748 0.3980474 247.3674
0.85 354.8747 0.3954635 248.2818
0.90 355.9661 0.3932887 249.1219
0.95 357.0984 0.3911352 249.8474
1.00 358.4258 0.3883199 250.6471
RMSE was used to select the optimal model using the
smallest value.
The final value used for the model was fraction = 0.35.
plot(lasso_tune)
# Ən yaxşı modelə sahib lambda dəyərinin seçilməsi;
lasso_tune$bestTune %>% dplyr::filter(
fraction == as.numeric(lasso_tune$bestTune))
# Modelin test xətası;
defaultSummary(data.frame(
obs = test_y,
pred = as.vector(predict(lasso_tune, as.matrix(test_x_num_lasso)))))
RMSE Rsquared MAE
305.8428674 0.7033515 221.2524652