Outline
Beberapa pemodelan dalam regresi nonlinier, yaitu:
1. Regresi Polinomial (Polynomial Regression)
2. Regresi Fungsi Tangga (Step Functions)
3. Basis Function Regression
4. Spline Regression
1. Piecewise Cubic Polynomial Function
2. Truncated Power Basis Function
3. Smoothing Spline
4. Natural Spline
5. LOESS Function
Packages yang digunakan dalam data yang dianalisis ini, sebagai berikut:
library(MultiKink)
library(tidyverse)
library(ggplot2)
library(dplyr)
library(purrr)
library(rsample)
library(mlr3measures)
library(splines)
library(ISLR)
Contoh Kuliah
Data Bangkitan & Plot
set.seed(123)
data.x <- rnorm(1000,1,1)
err <- rnorm(1000,0,5)
y <- 5+2*data.x+3*data.x^2+err
#Model polinomial ordo 2
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), pch=16, col="purple")
abline(v=1, col="red", lty=2)
Pertemuan 8
Regresi Linear
#Model dengan nilai dugaan y
lin.mod <-lm( y~data.x)
plot( data.x,y,xlim=c( -2,5), ylim=c( -10,70))
lines(data.x,lin.mod$fitted.values,col="purple")
summary(lin.mod)
##
## Call:
## lm(formula = y ~ data.x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.6384 -4.2679 -0.4316 3.8259 27.1070
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4176 0.2919 15.14 <2e-16 ***
## data.x 8.7312 0.2056 42.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.445 on 998 degrees of freedom
## Multiple R-squared: 0.6437, Adjusted R-squared: 0.6434
## F-statistic: 1803 on 1 and 998 DF, p-value: < 2.2e-16
Regresi Polimonial
pol.mod <- lm( y~data.x+I(data.x^2))
ix <- sort( data.x,index.return=T)$ix #indeks data yang berurutan berdasarkan nilai
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70))
lines(data.x[ix], pol.mod$fitted.values[ix],col="blue", cex=2)
summary(pol.mod)
##
## Call:
## lm(formula = y ~ data.x + I(data.x^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.1593 -3.4709 0.0247 3.5581 16.4277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.7597 0.2284 20.841 <2e-16 ***
## data.x 2.5366 0.2930 8.656 <2e-16 ***
## I(data.x^2) 2.9540 0.1169 25.272 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.034 on 997 degrees of freedom
## Multiple R-squared: 0.7828, Adjusted R-squared: 0.7824
## F-statistic: 1797 on 2 and 997 DF, p-value: < 2.2e-16
Regresi Fungsi Tangga
range( data.x)
## [1] -1.809775 4.241040
#Fungsi tangga secara manual
c1 <- as.factor(ifelse(data.x<=0,1,0))
c2 <- as.factor(ifelse(data.x<=2 & data.x>0,1,0))
c3 <- as.factor(ifelse(data.x>2,1,0))
data.frame(y, c1, c2, c3)
## y c1 c2 c3
## 1 1.47960005 0 1 0
## 2 3.11774990 0 1 0
## 3 29.66848014 0 0 1
## 4 9.91810577 0 1 0
## 5 -1.66226604 0 1 0
## 6 37.74773090 0 0 1
## 7 15.57328957 0 1 0
## 8 16.76168677 1 0 0
## 9 9.34646890 0 1 0
## 10 4.79575147 0 1 0
## 11 38.27473885 0 0 1
## 12 27.42803871 0 1 0
## 13 7.59446579 0 1 0
## 14 13.26737350 0 1 0
## 15 5.42391443 0 1 0
## 16 34.80973650 0 0 1
## 17 15.86408276 0 1 0
## 18 -0.43969045 1 0 0
## 19 18.51449542 0 1 0
## 20 15.63450070 0 1 0
## 21 4.05770273 1 0 0
## 22 7.58410632 0 1 0
## 23 11.94287983 1 0 0
## 24 10.25469864 0 1 0
## 25 -2.07076598 0 1 0
## 26 6.18404135 1 0 0
## 27 27.07569393 0 1 0
## 28 18.37393663 0 1 0
## 29 6.88072959 1 0 0
## 30 28.35277897 0 0 1
## 31 7.97265291 0 1 0
## 32 9.40128751 0 1 0
## 33 14.79251046 0 1 0
## 34 17.04833283 0 1 0
## 35 23.27565349 0 1 0
## 36 11.24733267 0 1 0
## 37 16.68640678 0 1 0
## 38 11.65786569 0 1 0
## 39 8.10769799 0 1 0
## 40 16.50145065 0 1 0
## 41 0.77846089 0 1 0
## 42 11.49700187 0 1 0
## 43 4.23586013 1 0 0
## 44 40.16059644 0 0 1
## 45 26.36166872 0 0 1
## 46 -0.30275295 1 0 0
## 47 0.69661531 0 1 0
## 48 4.44765445 0 1 0
## 49 26.82354345 0 1 0
## 50 9.63272255 0 1 0
## 51 13.87623091 0 1 0
## 52 8.82483752 0 1 0
## 53 12.01501363 0 1 0
## 54 21.80963719 0 0 1
## 55 14.13630206 0 1 0
## 56 31.95434042 0 0 1
## 57 0.77361920 1 0 0
## 58 15.97499593 0 1 0
## 59 14.61851167 0 1 0
## 60 14.65607974 0 1 0
## 61 20.87916435 0 1 0
## 62 3.67346016 0 1 0
## 63 13.24810549 0 1 0
## 64 10.14662442 1 0 0
## 65 4.05946383 1 0 0
## 66 7.82498458 0 1 0
## 67 8.74262829 0 1 0
## 68 12.72139695 0 1 0
## 69 19.57423792 0 1 0
## 70 47.90473247 0 0 1
## 71 9.47077529 0 1 0
## 72 5.66370731 1 0 0
## 73 15.95272689 0 0 1
## 74 2.92328269 0 1 0
## 75 7.63044060 0 1 0
## 76 19.10528763 0 0 1
## 77 10.53625354 0 1 0
## 78 3.03302324 1 0 0
## 79 11.02124115 0 1 0
## 80 5.29419317 0 1 0
## 81 19.57143109 0 1 0
## 82 15.19067483 0 1 0
## 83 8.60005453 0 1 0
## 84 7.94136372 0 1 0
## 85 11.68090998 0 1 0
## 86 7.86637558 0 1 0
## 87 17.92627170 0 0 1
## 88 18.64130657 0 1 0
## 89 5.44773828 0 1 0
## 90 14.40787638 0 0 1
## 91 29.75870113 0 1 0
## 92 3.40235872 0 1 0
## 93 14.94489005 0 1 0
## 94 11.24579576 0 1 0
## 95 23.28450551 0 0 1
## 96 8.50029328 0 1 0
## 97 44.04759275 0 0 1
## 98 34.51068677 0 0 1
## 99 10.70155804 0 1 0
## 100 3.72483350 1 0 0
## 101 10.41074024 0 1 0
## 102 16.25614918 0 1 0
## 103 3.52619045 0 1 0
## 104 0.57807961 0 1 0
## 105 5.90517285 0 1 0
## 106 8.27604882 0 1 0
## 107 0.64129376 0 1 0
## 108 5.42220882 1 0 0
## 109 0.79192183 0 1 0
## 110 20.69176893 0 1 0
## 111 3.26565474 0 1 0
## 112 20.75839780 0 1 0
## 113 17.03201741 1 0 0
## 114 4.98486945 0 1 0
## 115 20.25293002 0 1 0
## 116 16.80705558 0 1 0
## 117 10.52794081 0 1 0
## 118 3.83763263 0 1 0
## 119 13.24489619 0 1 0
## 120 -5.07380008 1 0 0
## 121 7.76672098 0 1 0
## 122 -2.07088970 0 1 0
## 123 13.77404746 0 1 0
## 124 7.19449493 0 1 0
## 125 32.32702133 0 0 1
## 126 21.97973918 0 1 0
## 127 11.79912675 0 1 0
## 128 8.42317392 0 1 0
## 129 6.57997773 0 1 0
## 130 1.60266674 0 1 0
## 131 30.26810163 0 0 1
## 132 13.74278415 0 1 0
## 133 12.67758945 0 1 0
## 134 2.24368288 0 1 0
## 135 1.10657548 1 0 0
## 136 19.42339606 0 0 1
## 137 0.87533982 1 0 0
## 138 24.05739688 0 1 0
## 139 44.10258568 0 0 1
## 140 3.91887733 1 0 0
## 141 15.29709564 0 1 0
## 142 6.46346850 0 1 0
## 143 5.18394089 1 0 0
## 144 5.24983424 1 0 0
## 145 6.33263690 1 0 0
## 146 2.86493832 0 1 0
## 147 0.48166155 1 0 0
## 148 22.90841096 0 1 0
## 149 37.28910740 0 0 1
## 150 6.18832701 1 0 0
## 151 17.87865559 0 1 0
## 152 13.13736887 0 1 0
## 153 15.94400584 0 1 0
## 154 5.84898165 1 0 0
## 155 16.08610270 0 1 0
## 156 8.57999987 0 1 0
## 157 13.79705912 0 1 0
## 158 8.82809640 0 1 0
## 159 14.75125984 0 1 0
## 160 3.24481533 0 1 0
## 161 24.29766227 0 0 1
## 162 3.24329661 1 0 0
## 163 4.35292703 1 0 0
## 164 66.86523051 0 0 1
## 165 3.93388695 0 1 0
## 166 2.55919653 0 1 0
## 167 18.05239509 0 1 0
## 168 10.64008361 0 1 0
## 169 8.49275430 0 1 0
## 170 20.77213428 0 1 0
## 171 10.34189635 0 1 0
## 172 17.24333528 0 1 0
## 173 4.94509135 0 1 0
## 174 41.45244409 0 0 1
## 175 5.21797906 0 1 0
## 176 8.67819048 1 0 0
## 177 7.42729324 0 1 0
## 178 12.72255252 0 1 0
## 179 5.17055033 0 1 0
## 180 3.07526646 0 1 0
## 181 5.51054775 1 0 0
## 182 21.36078431 0 0 1
## 183 7.35171565 0 1 0
## 184 2.98360478 0 1 0
## 185 11.31189823 0 1 0
## 186 14.38166939 0 1 0
## 187 18.46262440 0 0 1
## 188 9.16425677 0 1 0
## 189 24.93702791 0 1 0
## 190 -4.24107485 0 1 0
## 191 10.25433389 0 1 0
## 192 18.04229695 0 1 0
## 193 21.75145670 0 1 0
## 194 6.02509638 0 1 0
## 195 0.35014496 1 0 0
## 196 38.77157809 0 0 1
## 197 12.62435188 0 1 0
## 198 11.95095558 1 0 0
## 199 2.19883072 0 1 0
## 200 6.59680639 1 0 0
## 201 45.19403401 0 0 1
## 202 21.87903647 0 0 1
## 203 12.34736866 0 1 0
## 204 11.49108198 0 1 0
## 205 10.35151234 0 1 0
## 206 12.35376632 0 1 0
## 207 0.61464599 0 1 0
## 208 11.84374615 0 1 0
## 209 28.93608178 0 0 1
## 210 11.04829906 0 1 0
## 211 12.00580753 0 1 0
## 212 9.99166820 0 1 0
## 213 25.75731175 0 0 1
## 214 0.51830036 0 1 0
## 215 4.33446977 0 1 0
## 216 35.95841025 0 0 1
## 217 -3.81605403 0 1 0
## 218 -1.65568551 0 1 0
## 219 -1.11475907 1 0 0
## 220 -3.27169079 1 0 0
## 221 8.49446403 0 1 0
## 222 11.12496420 0 1 0
## 223 11.75133836 0 0 1
## 224 13.97490684 0 1 0
## 225 5.53430526 0 1 0
## 226 14.77263703 0 1 0
## 227 0.33383614 0 1 0
## 228 11.61190666 0 1 0
## 229 21.41683709 0 1 0
## 230 6.78130502 1 0 0
## 231 32.84924685 0 0 1
## 232 19.07025553 0 1 0
## 233 11.03303718 0 1 0
## 234 -3.39644005 0 1 0
## 235 5.37537523 0 1 0
## 236 -5.00475493 1 0 0
## 237 7.08443141 0 1 0
## 238 11.76736299 0 1 0
## 239 16.31910313 0 1 0
## 240 10.62758687 0 1 0
## 241 1.92627325 0 1 0
## 242 10.76956525 0 1 0
## 243 35.80469349 0 0 1
## 244 0.86122959 1 0 0
## 245 5.40174383 0 1 0
## 246 39.32972789 0 0 1
## 247 10.13802855 0 1 0
## 248 7.41264939 1 0 0
## 249 13.03102120 0 1 0
## 250 16.52610960 0 1 0
## 251 12.67691593 0 1 0
## 252 9.56663155 0 1 0
## 253 9.77160127 0 1 0
## 254 12.67896430 0 1 0
## 255 36.91037757 0 0 1
## 256 4.30371107 0 1 0
## 257 16.62486498 0 0 1
## 258 19.19931521 0 1 0
## 259 8.53117459 0 1 0
## 260 5.15617569 1 0 0
## 261 10.35213812 0 1 0
## 262 10.56743080 0 1 0
## 263 9.07055357 0 1 0
## 264 23.91041988 0 0 1
## 265 44.48725838 0 0 1
## 266 34.88075882 0 0 1
## 267 11.11808226 0 1 0
## 268 12.36898386 1 0 0
## 269 7.30502608 0 1 0
## 270 16.36586960 0 1 0
## 271 23.31725652 0 1 0
## 272 23.54002132 0 1 0
## 273 18.95281715 0 1 0
## 274 3.27876475 1 0 0
## 275 18.41763673 0 1 0
## 276 8.17275979 0 1 0
## 277 11.73453401 0 1 0
## 278 15.32920539 0 1 0
## 279 13.42872884 0 1 0
## 280 9.84734182 0 1 0
## 281 2.58002338 1 0 0
## 282 16.82756499 0 1 0
## 283 13.19143387 0 1 0
## 284 -3.48217951 0 1 0
## 285 4.16287198 0 1 0
## 286 10.76377369 0 1 0
## 287 10.58828569 0 1 0
## 288 25.19955111 0 0 1
## 289 -1.56614471 0 1 0
## 290 9.65741489 0 1 0
## 291 26.70991244 0 0 1
## 292 30.63353424 0 0 1
## 293 22.90470369 0 0 1
## 294 13.84658807 0 1 0
## 295 38.46478389 0 0 1
## 296 11.12461451 0 1 0
## 297 37.01434875 0 0 1
## 298 0.31470649 1 0 0
## 299 9.91059856 0 1 0
## 300 29.22841437 0 0 1
## 301 2.06414773 0 1 0
## 302 4.07007995 0 1 0
## 303 -0.60459869 0 1 0
## 304 6.67500736 1 0 0
## 305 9.20004802 0 1 0
## 306 16.22020907 0 1 0
## 307 -0.04160235 1 0 0
## 308 12.36682575 0 1 0
## 309 19.76120742 0 0 1
## 310 38.75379254 0 0 1
## 311 32.20170443 0 0 1
## 312 15.25469581 0 1 0
## 313 8.71436703 1 0 0
## 314 2.52503437 0 1 0
## 315 5.16279742 0 1 0
## 316 19.30663762 0 1 0
## 317 5.79256727 0 1 0
## 318 -3.83142409 1 0 0
## 319 38.31329880 0 0 1
## 320 21.58481878 0 1 0
## 321 9.15036438 0 1 0
## 322 14.22584071 0 0 1
## 323 14.17780434 1 0 0
## 324 33.54846689 0 1 0
## 325 7.67441509 0 1 0
## 326 21.12152123 0 1 0
## 327 15.64732385 0 1 0
## 328 12.75658133 0 1 0
## 329 24.27898167 0 0 1
## 330 1.42287498 0 1 0
## 331 17.56385467 0 0 1
## 332 7.84170300 1 0 0
## 333 4.33572344 0 1 0
## 334 28.00675138 0 0 1
## 335 12.88357107 0 1 0
## 336 -3.10126196 1 0 0
## 337 8.85764457 1 0 0
## 338 1.29722648 0 1 0
## 339 18.13210635 0 1 0
## 340 7.02325724 0 1 0
## 341 15.06936069 0 1 0
## 342 27.73094669 0 1 0
## 343 28.83541176 0 0 1
## 344 6.54205951 0 1 0
## 345 1.99398113 0 1 0
## 346 1.16713443 1 0 0
## 347 4.99329513 0 1 0
## 348 8.44596798 0 1 0
## 349 0.73898611 0 1 0
## 350 10.17805908 0 1 0
## 351 20.47773687 0 0 1
## 352 -1.19665919 1 0 0
## 353 3.17642954 0 1 0
## 354 15.39947331 0 1 0
## 355 9.76317916 0 1 0
## 356 23.03356344 0 1 0
## 357 13.78136156 0 1 0
## 358 2.25985082 0 1 0
## 359 4.67269102 1 0 0
## 360 52.45129868 0 0 1
## 361 17.98473097 0 1 0
## 362 17.03174994 0 1 0
## 363 18.11691033 0 1 0
## 364 25.18765812 0 0 1
## 365 12.70651837 0 1 0
## 366 7.59813089 0 1 0
## 367 19.98068245 0 1 0
## 368 9.49860347 0 1 0
## 369 21.37731593 0 1 0
## 370 9.33493357 0 1 0
## 371 44.39954066 0 0 1
## 372 -1.62743036 1 0 0
## 373 13.41108350 0 1 0
## 374 11.54569720 0 1 0
## 375 10.16728642 0 1 0
## 376 9.47144034 0 1 0
## 377 -1.30450314 0 1 0
## 378 8.45907326 0 1 0
## 379 3.97215727 0 1 0
## 380 8.39638862 1 0 0
## 381 12.51155495 0 1 0
## 382 13.73982871 0 1 0
## 383 13.70172298 0 1 0
## 384 7.68527527 1 0 0
## 385 17.68595738 0 1 0
## 386 16.88662018 0 0 1
## 387 13.53026439 0 1 0
## 388 5.34629482 1 0 0
## 389 10.70757818 0 1 0
## 390 7.19076172 0 1 0
## 391 3.58163815 0 1 0
## 392 1.33692749 1 0 0
## 393 24.70590616 0 1 0
## 394 11.82364135 0 1 0
## 395 10.33681561 0 1 0
## 396 22.70433908 0 0 1
## 397 7.86450226 0 1 0
## 398 24.66388377 0 1 0
## 399 5.52220584 1 0 0
## 400 4.71021506 0 1 0
## 401 3.99719211 0 1 0
## 402 1.42151308 1 0 0
## 403 9.70497174 0 1 0
## 404 7.61345758 0 1 0
## 405 17.85314170 0 1 0
## 406 11.44326831 1 0 0
## 407 10.46062487 0 1 0
## 408 24.59106752 0 1 0
## 409 -1.94742636 0 1 0
## 410 10.56993883 0 1 0
## 411 14.99003555 0 1 0
## 412 15.25083337 0 1 0
## 413 10.66026512 0 1 0
## 414 13.09442724 0 1 0
## 415 8.74092305 0 1 0
## 416 11.13254760 1 0 0
## 417 11.82480991 0 1 0
## 418 13.41592013 0 1 0
## 419 19.77087698 0 1 0
## 420 9.72392420 0 1 0
## 421 32.98486823 0 0 1
## 422 17.73939730 0 1 0
## 423 12.52118738 0 1 0
## 424 28.37787951 0 0 1
## 425 2.82996315 0 1 0
## 426 14.90786482 0 1 0
## 427 46.40301030 0 0 1
## 428 14.32861892 0 1 0
## 429 30.57360210 0 0 1
## 430 3.30170141 1 0 0
## 431 12.50694677 0 1 0
## 432 5.53392196 0 1 0
## 433 15.06220849 0 1 0
## 434 6.95665465 1 0 0
## 435 -3.32145989 0 1 0
## 436 6.70502721 1 0 0
## 437 6.38337595 0 1 0
## 438 25.50743359 0 0 1
## 439 4.78224051 1 0 0
## 440 20.99167857 0 0 1
## 441 4.65798684 1 0 0
## 442 21.12385964 0 1 0
## 443 13.40902019 0 1 0
## 444 11.31126568 0 1 0
## 445 7.79339374 0 1 0
## 446 5.50696814 0 1 0
## 447 5.38312508 0 1 0
## 448 9.44416532 0 1 0
## 449 8.97393647 1 0 0
## 450 4.76846687 1 0 0
## 451 36.83422982 0 0 1
## 452 24.52770215 0 0 1
## 453 18.29922045 0 1 0
## 454 23.92778565 0 1 0
## 455 17.35743334 0 1 0
## 456 12.50463767 1 0 0
## 457 26.92432882 0 0 1
## 458 13.67249657 0 1 0
## 459 15.81774274 0 1 0
## 460 10.00582784 0 1 0
## 461 14.91556804 0 1 0
## 462 11.22432136 0 1 0
## 463 17.45589795 0 1 0
## 464 10.50367570 0 1 0
## 465 6.95847921 1 0 0
## 466 23.06327017 0 0 1
## 467 17.18966908 0 1 0
## 468 36.15015929 0 0 1
## 469 11.39212410 0 1 0
## 470 18.64637263 0 0 1
## 471 36.05939815 0 0 1
## 472 1.39021100 0 1 0
## 473 -0.16816948 1 0 0
## 474 7.53399963 0 1 0
## 475 -0.56555174 0 1 0
## 476 2.83975259 0 1 0
## 477 38.00723082 0 0 1
## 478 4.82686375 0 1 0
## 479 4.14297602 0 1 0
## 480 7.76973138 0 1 0
## 481 16.77020670 0 1 0
## 482 16.11962944 0 1 0
## 483 28.12509640 0 0 1
## 484 16.93343103 0 1 0
## 485 13.36966387 0 1 0
## 486 21.69919503 0 1 0
## 487 16.89318850 0 1 0
## 488 6.39845182 0 1 0
## 489 42.02760377 0 0 1
## 490 12.23490218 0 1 0
## 491 13.02733610 0 1 0
## 492 21.60483405 0 0 1
## 493 21.44545112 0 0 1
## 494 16.26489427 1 0 0
## 495 -0.16431291 0 1 0
## 496 5.74090060 1 0 0
## 497 16.01683804 0 1 0
## 498 16.49403434 0 1 0
## 499 18.75621607 0 1 0
## 500 14.51625119 0 1 0
## 501 2.16674874 0 1 0
## 502 3.47643578 0 1 0
## 503 16.86665306 0 0 1
## 504 20.83611338 0 1 0
## 505 10.36119376 1 0 0
## 506 19.90204727 0 1 0
## 507 7.07115684 0 1 0
## 508 1.92411454 1 0 0
## 509 16.39094353 0 1 0
## 510 13.90892428 0 1 0
## 511 8.05824470 0 1 0
## 512 4.08011158 0 1 0
## 513 23.20250509 0 1 0
## 514 17.41731302 0 1 0
## 515 4.73295355 0 1 0
## 516 8.42260990 0 1 0
## 517 7.61824121 0 1 0
## 518 16.18004371 0 1 0
## 519 4.33890931 1 0 0
## 520 6.90405901 0 1 0
## 521 21.10305843 0 1 0
## 522 7.75190643 0 1 0
## 523 2.92092312 0 1 0
## 524 6.44119120 0 1 0
## 525 22.60375174 0 0 1
## 526 16.34547247 0 1 0
## 527 32.14362561 0 0 1
## 528 4.38083834 0 1 0
## 529 12.78908248 0 1 0
## 530 9.62558271 0 1 0
## 531 24.75249081 0 1 0
## 532 8.84852444 0 1 0
## 533 4.62072794 1 0 0
## 534 9.53530578 0 1 0
## 535 38.99736145 0 0 1
## 536 23.27699566 0 1 0
## 537 22.47618196 0 0 1
## 538 14.07598511 0 1 0
## 539 7.32724981 0 1 0
## 540 12.15116542 0 1 0
## 541 13.59491076 0 1 0
## 542 8.30524430 0 1 0
## 543 16.74003960 0 1 0
## 544 11.69544571 1 0 0
## 545 20.00537047 0 1 0
## 546 27.04275834 0 1 0
## 547 4.41149452 1 0 0
## 548 19.41240085 0 1 0
## 549 39.42937499 0 0 1
## 550 5.91178362 0 1 0
## 551 18.79185076 0 1 0
## 552 1.78617733 0 1 0
## 553 17.40726649 0 0 1
## 554 16.92663144 0 1 0
## 555 35.97259019 0 0 1
## 556 -1.77963904 1 0 0
## 557 11.71940531 0 1 0
## 558 1.05482241 0 1 0
## 559 3.28325520 0 1 0
## 560 8.77954048 0 1 0
## 561 1.98501671 0 1 0
## 562 20.60193767 0 1 0
## 563 5.04046771 1 0 0
## 564 31.16003575 0 0 1
## 565 2.11347202 1 0 0
## 566 5.08317752 0 1 0
## 567 19.68990824 0 1 0
## 568 16.91701524 0 1 0
## 569 6.66384104 0 1 0
## 570 11.00115430 0 1 0
## 571 27.09174124 0 1 0
## 572 1.22131839 1 0 0
## 573 1.83239102 0 1 0
## 574 9.77311618 0 1 0
## 575 -0.88971914 0 1 0
## 576 30.68890547 0 0 1
## 577 17.70769143 0 1 0
## 578 11.62881801 0 1 0
## 579 4.54120107 1 0 0
## 580 7.66004326 0 1 0
## 581 3.65193699 0 1 0
## 582 13.47193433 0 1 0
## 583 -2.39515056 1 0 0
## 584 16.93673859 0 1 0
## 585 0.96358168 1 0 0
## 586 9.75530438 0 1 0
## 587 9.78441101 0 1 0
## 588 2.95685625 0 1 0
## 589 24.53039777 0 1 0
## 590 8.00892879 1 0 0
## 591 0.48181869 1 0 0
## 592 15.00039170 0 1 0
## 593 16.58477554 0 1 0
## 594 2.27522849 0 1 0
## 595 15.84436933 0 1 0
## 596 5.41092442 1 0 0
## 597 7.38877859 0 1 0
## 598 15.63985164 1 0 0
## 599 12.39156169 0 0 1
## 600 39.44818907 0 0 1
## 601 20.60748874 0 0 1
## 602 13.06603489 0 1 0
## 603 7.46670149 0 1 0
## 604 1.79751932 1 0 0
## 605 9.64531139 0 1 0
## 606 7.40011110 0 1 0
## 607 18.43371941 0 1 0
## 608 9.63375522 1 0 0
## 609 16.24933347 0 1 0
## 610 9.94505194 0 1 0
## 611 13.01796908 0 1 0
## 612 9.22123136 1 0 0
## 613 34.42155281 0 0 1
## 614 6.96010415 0 1 0
## 615 19.40140769 0 0 1
## 616 1.48872618 1 0 0
## 617 6.96149361 0 1 0
## 618 10.46174903 0 1 0
## 619 9.86284867 1 0 0
## 620 30.02453638 0 0 1
## 621 1.94867529 1 0 0
## 622 29.30382637 0 1 0
## 623 22.60428110 0 1 0
## 624 18.62365326 0 1 0
## 625 3.60776943 0 1 0
## 626 19.99691755 0 1 0
## 627 13.69504102 0 1 0
## 628 13.26771170 1 0 0
## 629 3.11827537 1 0 0
## 630 44.02498676 0 0 1
## 631 -4.52099878 0 1 0
## 632 2.94859309 1 0 0
## 633 8.65912616 0 1 0
## 634 18.33738407 0 1 0
## 635 3.84566210 1 0 0
## 636 3.82971716 1 0 0
## 637 15.95565792 0 1 0
## 638 28.59120635 0 0 1
## 639 11.81284763 0 1 0
## 640 8.92479496 0 1 0
## 641 -0.72811868 0 1 0
## 642 10.01089049 0 1 0
## 643 19.41599519 0 1 0
## 644 23.47794438 0 1 0
## 645 12.84867823 0 1 0
## 646 16.76682613 0 1 0
## 647 -3.98290619 0 1 0
## 648 1.80493088 0 1 0
## 649 22.33571729 0 1 0
## 650 19.40706480 0 1 0
## 651 -5.04767691 1 0 0
## 652 9.99105517 0 1 0
## 653 23.03326385 0 1 0
## 654 7.92542226 1 0 0
## 655 10.43171498 0 1 0
## 656 -2.29735956 0 1 0
## 657 17.15203660 0 0 1
## 658 15.35038331 0 1 0
## 659 22.36711116 0 1 0
## 660 1.30977333 0 1 0
## 661 24.26286210 0 1 0
## 662 26.78886595 0 0 1
## 663 40.17727275 0 0 1
## 664 1.50898058 0 1 0
## 665 3.84901180 1 0 0
## 666 8.01401876 0 1 0
## 667 16.99071264 0 1 0
## 668 13.97050399 0 1 0
## 669 17.69061713 0 1 0
## 670 29.17089447 0 0 1
## 671 4.08400195 0 1 0
## 672 12.96939954 0 1 0
## 673 13.47787279 0 1 0
## 674 3.16973808 0 1 0
## 675 15.37202847 0 1 0
## 676 12.75673097 1 0 0
## 677 9.19458047 1 0 0
## 678 2.11974273 1 0 0
## 679 3.00809250 0 1 0
## 680 12.78395652 0 1 0
## 681 15.26104484 0 1 0
## 682 24.89841177 0 1 0
## 683 4.07468224 0 1 0
## 684 11.31152669 0 1 0
## 685 0.87806044 1 0 0
## 686 10.32569923 0 1 0
## 687 4.17329731 0 1 0
## 688 2.25772294 0 1 0
## 689 1.02436267 0 1 0
## 690 7.01391477 1 0 0
## 691 13.70822022 0 1 0
## 692 14.96922164 0 0 1
## 693 17.53542855 0 0 1
## 694 14.37841312 0 1 0
## 695 2.63512759 1 0 0
## 696 50.31916874 0 0 1
## 697 6.14509052 0 1 0
## 698 7.75596581 0 1 0
## 699 14.33146249 0 1 0
## 700 4.95786366 1 0 0
## 701 4.80235528 0 1 0
## 702 2.44636674 1 0 0
## 703 -9.34292093 0 1 0
## 704 20.33644853 0 1 0
## 705 13.62174109 1 0 0
## 706 10.25869141 0 1 0
## 707 11.72537452 0 1 0
## 708 12.09547076 0 1 0
## 709 16.95571859 0 1 0
## 710 24.04653367 0 1 0
## 711 16.50779684 0 0 1
## 712 21.31946742 0 1 0
## 713 3.60990940 0 1 0
## 714 13.09842665 0 1 0
## 715 28.47150993 0 0 1
## 716 20.77938384 0 0 1
## 717 16.86681148 0 1 0
## 718 2.99198374 0 1 0
## 719 21.51715796 0 1 0
## 720 14.16181806 0 1 0
## 721 6.21795315 1 0 0
## 722 6.18283754 1 0 0
## 723 14.49462381 0 1 0
## 724 -0.88821837 1 0 0
## 725 11.28210956 0 1 0
## 726 1.72165724 0 1 0
## 727 8.37194371 0 1 0
## 728 6.24107332 0 1 0
## 729 0.35499934 0 1 0
## 730 21.34562379 0 1 0
## 731 40.97005000 0 0 1
## 732 11.80223803 0 1 0
## 733 7.95250937 1 0 0
## 734 31.57077375 0 0 1
## 735 4.61331823 0 1 0
## 736 14.76192223 0 1 0
## 737 20.25509644 0 1 0
## 738 7.97701049 0 1 0
## 739 8.31146633 0 1 0
## 740 22.80793391 0 0 1
## 741 16.91188100 0 1 0
## 742 18.08569147 0 1 0
## 743 45.40535737 0 0 1
## 744 26.05387119 0 1 0
## 745 16.15641349 0 1 0
## 746 2.40264600 1 0 0
## 747 47.04509674 0 0 1
## 748 5.03760497 0 1 0
## 749 52.55984613 0 0 1
## 750 14.88161755 0 1 0
## 751 25.90021784 0 0 1
## 752 13.56959933 0 1 0
## 753 14.20972109 0 1 0
## 754 6.24560539 0 1 0
## 755 10.92091942 0 1 0
## 756 16.70104747 0 1 0
## 757 23.35251635 0 0 1
## 758 9.47009283 0 1 0
## 759 2.87377425 1 0 0
## 760 11.38999646 0 1 0
## 761 9.83959386 0 1 0
## 762 7.80595356 0 1 0
## 763 15.85624632 0 1 0
## 764 4.87984297 1 0 0
## 765 25.18312624 0 1 0
## 766 14.47139149 0 1 0
## 767 25.95612046 0 0 1
## 768 12.37739802 0 1 0
## 769 12.98866804 0 1 0
## 770 28.49006447 0 0 1
## 771 1.28624682 1 0 0
## 772 6.74890121 0 1 0
## 773 23.98629126 0 1 0
## 774 7.32308079 1 0 0
## 775 2.24921276 0 1 0
## 776 42.71932766 0 0 1
## 777 12.67007754 0 1 0
## 778 7.83711887 0 1 0
## 779 14.38659902 0 1 0
## 780 7.25211786 0 1 0
## 781 19.77486090 0 1 0
## 782 17.17015958 0 1 0
## 783 11.40661128 0 1 0
## 784 0.08349592 1 0 0
## 785 5.66808622 1 0 0
## 786 11.84514581 0 1 0
## 787 20.83252156 0 1 0
## 788 11.00680950 1 0 0
## 789 0.12972273 1 0 0
## 790 6.32600950 0 1 0
## 791 16.70805219 0 1 0
## 792 -0.14474395 0 1 0
## 793 27.28203555 0 1 0
## 794 7.82632632 0 1 0
## 795 9.54483023 0 1 0
## 796 19.32122530 0 1 0
## 797 14.16775531 0 1 0
## 798 5.97697101 0 1 0
## 799 14.25884575 0 1 0
## 800 11.28210505 0 1 0
## 801 6.78425928 0 1 0
## 802 2.76200745 0 1 0
## 803 18.74910956 0 1 0
## 804 29.49501473 0 0 1
## 805 20.37645004 0 1 0
## 806 12.81254307 0 1 0
## 807 11.32011493 0 1 0
## 808 40.08825773 0 0 1
## 809 12.67984050 0 1 0
## 810 1.78608946 0 1 0
## 811 10.25528133 1 0 0
## 812 2.15029898 1 0 0
## 813 11.07010764 0 1 0
## 814 10.04811736 0 1 0
## 815 9.13687607 0 1 0
## 816 17.50111753 0 1 0
## 817 6.72534165 0 1 0
## 818 29.61527921 0 0 1
## 819 6.22148752 0 1 0
## 820 6.87591679 1 0 0
## 821 -1.50493628 1 0 0
## 822 25.07549936 0 1 0
## 823 10.15411982 0 1 0
## 824 13.90857341 0 1 0
## 825 9.61858204 0 1 0
## 826 10.66804211 0 1 0
## 827 0.54184457 0 1 0
## 828 13.92616755 1 0 0
## 829 3.95692396 1 0 0
## 830 10.30796850 1 0 0
## 831 22.13067113 0 0 1
## 832 4.62965476 0 1 0
## 833 9.01081917 0 1 0
## 834 23.53029496 0 0 1
## 835 8.36943276 0 1 0
## 836 16.65205568 0 1 0
## 837 -4.21822105 1 0 0
## 838 12.33850796 0 1 0
## 839 17.45415144 0 1 0
## 840 6.94447287 0 1 0
## 841 8.01637437 0 1 0
## 842 47.93607946 0 0 1
## 843 6.33724355 0 1 0
## 844 15.16964385 0 1 0
## 845 20.80672159 0 1 0
## 846 5.54800881 0 1 0
## 847 4.16081558 0 1 0
## 848 16.75961925 0 0 1
## 849 32.45204096 0 0 1
## 850 1.96971761 0 1 0
## 851 2.29994526 0 1 0
## 852 16.59822901 0 1 0
## 853 32.59854994 0 0 1
## 854 2.48566228 1 0 0
## 855 48.53157077 0 0 1
## 856 6.22698600 0 1 0
## 857 12.08549853 1 0 0
## 858 10.33280701 0 1 0
## 859 12.25655784 0 1 0
## 860 21.90172488 0 0 1
## 861 25.19536504 0 1 0
## 862 9.44628260 0 1 0
## 863 -2.34132523 1 0 0
## 864 4.31277884 0 1 0
## 865 8.13981565 0 1 0
## 866 55.27407028 0 0 1
## 867 -0.33746087 1 0 0
## 868 20.09807295 0 0 1
## 869 14.05665347 0 1 0
## 870 38.44783308 0 0 1
## 871 10.59580189 1 0 0
## 872 19.51848850 0 1 0
## 873 5.85078419 1 0 0
## 874 16.76773877 0 1 0
## 875 31.25106093 0 0 1
## 876 42.31532101 0 0 1
## 877 30.35747205 0 1 0
## 878 25.11410983 0 0 1
## 879 25.99828746 0 0 1
## 880 12.27114660 0 1 0
## 881 14.22599821 0 1 0
## 882 -2.73074669 0 1 0
## 883 12.32878664 0 1 0
## 884 20.34453809 0 0 1
## 885 17.73854621 0 1 0
## 886 18.82414437 0 1 0
## 887 11.34788490 1 0 0
## 888 -1.52911609 0 1 0
## 889 11.04572820 0 1 0
## 890 9.84623665 0 1 0
## 891 -1.42584231 0 1 0
## 892 22.01560602 0 1 0
## 893 10.53912539 1 0 0
## 894 33.44589824 0 0 1
## 895 18.11886750 0 1 0
## 896 1.56456059 0 1 0
## 897 11.34751899 0 1 0
## 898 7.08442801 0 1 0
## 899 11.97873583 0 1 0
## 900 15.79667999 0 1 0
## 901 12.65900696 1 0 0
## 902 3.26916662 0 1 0
## 903 12.50272634 0 1 0
## 904 39.35567549 0 0 1
## 905 20.56090838 0 0 1
## 906 -6.84647705 0 1 0
## 907 21.22374806 0 1 0
## 908 5.18618978 0 1 0
## 909 7.69018693 0 1 0
## 910 19.39034031 0 1 0
## 911 62.74241260 0 0 1
## 912 -0.46649450 1 0 0
## 913 15.99303923 0 1 0
## 914 4.79439994 1 0 0
## 915 16.47144393 0 1 0
## 916 28.49170170 0 0 1
## 917 28.90348847 0 0 1
## 918 1.46056394 1 0 0
## 919 11.46124758 0 1 0
## 920 33.79411929 0 0 1
## 921 -4.47763616 0 1 0
## 922 13.91404120 0 1 0
## 923 1.87595663 0 1 0
## 924 13.42952139 0 1 0
## 925 15.69679710 0 1 0
## 926 32.21787014 0 0 1
## 927 10.20769949 0 1 0
## 928 12.19549525 0 1 0
## 929 51.96506237 0 0 1
## 930 9.87125750 0 1 0
## 931 10.63741380 0 1 0
## 932 10.55482689 1 0 0
## 933 13.44024096 0 1 0
## 934 1.99886304 0 1 0
## 935 13.10178123 0 1 0
## 936 -0.49797287 0 1 0
## 937 2.92906798 0 1 0
## 938 11.84695675 0 1 0
## 939 -2.83322544 0 1 0
## 940 17.66535106 0 0 1
## 941 28.20130463 0 0 1
## 942 3.43498139 0 1 0
## 943 11.25814633 1 0 0
## 944 4.54204507 0 1 0
## 945 9.10542824 0 1 0
## 946 14.92772099 0 1 0
## 947 2.76022765 0 1 0
## 948 36.61079310 0 0 1
## 949 5.96876271 0 1 0
## 950 18.78984915 1 0 0
## 951 9.56050594 0 1 0
## 952 7.35333371 1 0 0
## 953 9.61863177 0 1 0
## 954 10.82386970 0 1 0
## 955 20.11780189 0 1 0
## 956 3.22198822 1 0 0
## 957 12.01251226 0 1 0
## 958 14.65754922 0 1 0
## 959 2.53116135 1 0 0
## 960 30.17641642 0 1 0
## 961 10.97584087 0 1 0
## 962 54.31816741 0 0 1
## 963 9.56933193 0 1 0
## 964 17.16601852 0 1 0
## 965 0.80478609 0 1 0
## 966 15.35313494 0 1 0
## 967 9.84492893 1 0 0
## 968 -1.34710439 1 0 0
## 969 5.55610575 0 1 0
## 970 7.33200309 0 1 0
## 971 16.16605761 0 1 0
## 972 2.65193888 1 0 0
## 973 3.62807334 1 0 0
## 974 12.68466446 1 0 0
## 975 15.16866201 0 1 0
## 976 9.57109073 1 0 0
## 977 7.76539700 0 1 0
## 978 5.12905739 0 1 0
## 979 4.39118554 0 1 0
## 980 15.15203895 0 1 0
## 981 45.62522524 0 0 1
## 982 11.92286227 1 0 0
## 983 4.82725817 0 1 0
## 984 -1.90539309 0 1 0
## 985 34.20031783 0 0 1
## 986 2.57733366 0 1 0
## 987 9.96522218 0 1 0
## 988 26.75101596 0 1 0
## 989 3.64104722 1 0 0
## 990 39.20800411 0 0 1
## 991 11.29036560 0 1 0
## 992 9.49566282 1 0 0
## 993 15.49054149 0 1 0
## 994 4.74618497 0 1 0
## 995 6.40012227 0 1 0
## 996 9.68770335 0 1 0
## 997 23.27796586 0 0 1
## 998 6.05484776 1 0 0
## 999 9.32273117 0 1 0
## 1000 5.89033473 0 1 0
step.mod <- lm(y~c1+c2+c3)
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70))
lines(data.x,lin.mod$fitted.values,col="purple")
lines(data.x[ix], step.mod$fitted.values[ix],col="red")
summary(step.mod)
##
## Call:
## lm(formula = y ~ c1 + c2 + c3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.493 -4.885 -0.375 4.734 35.863
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.0026 0.5833 53.15 <2e-16 ***
## c11 -25.7742 0.8149 -31.63 <2e-16 ***
## c21 -19.8529 0.6474 -30.67 <2e-16 ***
## c31 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.309 on 997 degrees of freedom
## Multiple R-squared: 0.5422, Adjusted R-squared: 0.5413
## F-statistic: 590.4 on 2 and 997 DF, p-value: < 2.2e-16
Perbandingan Model
nilai_AIC <- rbind(AIC(lin.mod),
AIC(pol.mod),
AIC(step.mod))
nama_model <- c("Linear","Poly (ordo=2)","Tangga (breaks=3)")
data.frame(nama_model,nilai_AIC)
## nama_model nilai_AIC
## 1 Linear 6568.395
## 2 Poly (ordo=2) 6075.346
## 3 Tangga (breaks=3) 6821.116
MSE= function(pred,actual)
{
mean((pred-actual)^2)
}
MSE(predict(lin.mod), y)
## [1] 41.45124
MSE(predict(pol.mod), y)
## [1] 25.26623
MSE(predict(step.mod), y)
## [1] 53.26283
Berdasarkan nilai AIC dan MSE terkecil, model regresi polinomial dengan ordo 2 merupakan model terbaik dibandingkan model lainnya.
Pertemuan 9
Piecewise Cubic Polynomial
dt.all <- cbind(y,data.x)
#Knots=1
##Jumlah amatan data yang >=1
dt1 <- dt.all[data.x <=1,]
dim(dt1)
## [1] 495 2
##Jumlah amatan data yang >1
dt2 <- dt.all[data.x >1,]
dim(dt2)
## [1] 505 2
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), pch=16, col="orange")
cub.mod1 <- lm(dt1[,1]~dt1[,2]+I(dt1[,2]^2)+I(dt1[,2]^3))
ix <- sort(dt1[,2], index.return=T)$ix
lines(dt1[ix,2],cub.mod1$fitted.values[ix],col="blue", lwd=2)
cub.mod2 <- lm(dt2[,1]~dt2[,2]+I(dt2[,2]^2)+I(dt2[,2]^3))
ix <- sort(dt2[,2], index.return=T)$ix
lines(dt2[ix,2],cub.mod2$fitted.values[ix],col="purple", lwd=2)
Truncated Power Basic
plot(data.x,y,xlim=c( -2,5), ylim=c( -10,70), pch=16,col="orange")
abline(v=1,col="red", lty=2)
hx <- ifelse( data.x>1,(data.x-1)^3,0)
cubspline.mod <- lm( y~data.x+I(data.x^2)+I(data.x^3)+hx)
ix <- sort( data.x,index.return=T)$ix
lines( data.x[ix], cubspline.mod$fitted.values[ix],col="purple", lwd=2)
Perbandingan dengan k-fold CV
plot( data.x,y,xlim=c(-2,5), ylim=c( -10,70), pch=16,col="orange")
abline(v=0,col="red", lty=2)
abline(v=2,col="red", lty=2)
hx1 <- ifelse( data.x>0,(data.x-0)^3,0)
hx2 <- ifelse( data.x>2,(data.x-2)^3,0)
cubspline.mod2 <- lm( y~data.x+I(data.x^2)+I(data.x^3)+hx1+hx2)
ix <- sort( data.x,index.return=T)$ix
lines( data.x[ix],cubspline.mod2$fitted.values[ix],col="blue", lwd=2)
Perbandingan 1 Knots dan 2 Knots dengan 5-fold CV
##1 knots
data.all <- cbind( y,data.x,hx)
set.seed(456)
ind <- sample(1:5,length( data.x),replace=T)
res <- c()
for( i in 1:5){
dt.train <- data.all[ ind!=i,]
x.test <- data.all[ ind==i, -1]
y.test <- data.all[ ind==i,1]
mod1 <- lm( dt.train[,1]~dt.train[,2]+I( dt.train[,2]^2)+I( dt.train[,2]^3)+dt.train[,3])
x.test.olah <- cbind(1,x.test[,1], x.test[,1]^2,x.test[,1]^3,x.test[,2])
beta <- coefficients(mod1)
prediksi <- x.test.olah%*%beta
res <- c( res,mean(( y.test-prediksi)^2))
}
res
## [1] 22.08767 21.39598 29.47677 29.68683 25.18070
mean(res)
## [1] 25.56559
##2 knots (knots = 0,2)
data.all2 <- cbind(y,data.x,hx1,hx2)
set.seed(456)
ind2 <- sample(1:5,length( data.x),replace=T)
res2 <- c()
for( i in 1:5){
dt.train2 <- data.all2[ind2!=i,]
x.test2 <- data.all2[ind2==i,-1]
y.test2 <- data.all2[ind2==i,1]
mod2 <- lm(dt.train2[,1]~dt.train2[,2]+I(dt.train2[,2]^2)+I(dt.train2[,2]^3)+dt.train2[,3]+dt.train2[,4])
x.test.olah2 <- cbind(1,x.test2[,1],x.test2[,1]^2,x.test2[,1]^3,x.test2[,2],x.test2[,3])
beta2 <- coefficients(mod2)
prediksi2 <- x.test.olah2%*%beta2
res2 <- c(res2,mean((y.test2-prediksi2)^2))
}
res2
## [1] 22.32898 21.33868 29.40459 29.79913 25.22047
mean(res2)
## [1] 25.61837
Smoothing Spline
ss1 <- smooth.spline(data.x,y,all.knots = T)
ss2 <- smooth.spline(data.x,y,all.knots = F)
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), pch=16,col="orange")
lines(ss1,col="purple", lwd=2)
lines(ss2,col="green", lwd=2)
Fungsi all.knots = T menghasilkan visualisasi yang lebih mempertimbahnkan semua amatan x dimana menyesuaikan pola data dan residualnya, sehingga hasilnya lebih fitted dibandingkan fungsi Fungsi all.knots = F hasilnya lebih mulus.
Contoh Responsi
Data TRICEPS
Data yang digunakan untuk ilustrasi berasal dari studi antropometri terhadap 892 perempuan di bawah 50 tahun di tiga desa Gambia di Afrika Barat. Data terdiri dari 3 Kolom yaitu Age, Intriceps, dan Tricepts. Berikut adalah penjelasannya pada masing-masing kolom:
- Age          : umur responden
- Intriceps  : logaritma dari ketebalan lipatan kulit triceps
- Triceps     : ketebalan lipatan kulit triceps
Lipatan kulit trisep diperlukan untuk menghitung lingkar otot lengan atas. Ketebalannya memberikan informasi tentang cadangan lemak tubuh, sedangkan massa otot yang dihitung memberikan informasi tentang cadangan protein.
x : age
y : triceps
data("triceps", package="MultiKink")
Scatter Plot
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="blue") +
theme_bw()
Berdasarkan pola hubungan yang terlihat pada scatterplot, kita akan mencoba untuk mencari model yang bisa merepresentasikan pola hubungan tersebut dengan baik.
Pertemuan 8
Regresi Linear
mod_linear <- lm(triceps~age,data=triceps)
summary(mod_linear)
##
## Call:
## lm(formula = triceps ~ age, data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9512 -2.3965 -0.5154 1.5822 25.1233
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.19717 0.21244 29.17 <2e-16 ***
## age 0.21584 0.01014 21.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.007 on 890 degrees of freedom
## Multiple R-squared: 0.3372, Adjusted R-squared: 0.3365
## F-statistic: 452.8 on 1 and 890 DF, p-value: < 2.2e-16
Ringkasan Nilai R-squared dan AIC
ringkasan_linear <- summary(mod_linear)
ringkasan_linear$r.squared
## [1] 0.3372092
AIC(mod_linear)
## [1] 5011.515
tabel_data <- rbind(data.frame("Y_Aktual" = triceps$triceps,
"Y_Prediksi" = predict(mod_linear),
"Residuals" = residuals(mod_linear)))
kableExtra::kable(head(tabel_data) ,caption = 'Ringkasan Nilai R-Squared dan AIC')
| Y_Aktual | Y_Prediksi | Residuals |
|---|---|---|
| 3.4 | 8.798074 | -5.398074 |
| 4.0 | 8.336171 | -4.336171 |
| 4.2 | 8.364231 | -4.164231 |
| 4.2 | 8.677202 | -4.477202 |
| 4.4 | 8.381498 | -3.981498 |
| 4.4 | 8.759222 | -4.359222 |
Scatter Plot
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~x,lty = 1,
col = "purple",se = F)+
theme_bw()
Regresi Polinomial Derajat 2 (Ordo 2)
mod_polinomial2 <- lm(triceps ~ poly(age,2,raw = T),
data=triceps)
summary(mod_polinomial2)
##
## Call:
## lm(formula = triceps ~ poly(age, 2, raw = T), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.5677 -2.4401 -0.4587 1.6368 24.9961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.0229191 0.3063806 19.658 < 2e-16 ***
## poly(age, 2, raw = T)1 0.2434733 0.0364403 6.681 4.17e-11 ***
## poly(age, 2, raw = T)2 -0.0006257 0.0007926 -0.789 0.43
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.008 on 889 degrees of freedom
## Multiple R-squared: 0.3377, Adjusted R-squared: 0.3362
## F-statistic: 226.6 on 2 and 889 DF, p-value: < 2.2e-16
Scatter Plot
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "purple",se = F)+
theme_bw()
Scatter Plot dengan Standard Error
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "purple",se = T)+
theme_bw()
Regresi Polinomial Derajat 3 (Ordo 3)
mod_polinomial3 = lm(triceps ~ poly(age,3,raw = T),data=triceps)
summary(mod_polinomial3)
##
## Call:
## lm(formula = triceps ~ poly(age, 3, raw = T), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.5832 -1.9284 -0.5415 1.3283 24.4440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.004e+00 3.831e-01 20.893 < 2e-16 ***
## poly(age, 3, raw = T)1 -3.157e-01 7.721e-02 -4.089 4.73e-05 ***
## poly(age, 3, raw = T)2 3.101e-02 3.964e-03 7.824 1.45e-14 ***
## poly(age, 3, raw = T)3 -4.566e-04 5.612e-05 -8.135 1.38e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.868 on 888 degrees of freedom
## Multiple R-squared: 0.3836, Adjusted R-squared: 0.3815
## F-statistic: 184.2 on 3 and 888 DF, p-value: < 2.2e-16
Pada model ini, peubah semua \(X^2\) signifikan dan nilai R-squared naik dari model sebelumnya.
Scatter Plot dengan Standard Error
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "purple",se = T)+
theme_bw()
Perbandingan Model Liniear & Polinomial
AIC(mod_linear)
## [1] 5011.515
AIC(mod_polinomial2)
## [1] 5012.89
AIC(mod_polinomial3)
## [1] 4950.774
MSE(predict(mod_linear),triceps$triceps)
## [1] 16.01758
MSE(predict(mod_polinomial2),triceps$triceps)
## [1] 16.00636
MSE(predict(mod_polinomial3),triceps$triceps)
## [1] 14.89621
Regresi Fungsi Tangga (5)
mod_tangga5 <- lm(triceps ~ cut(age,5),data=triceps)
summary(mod_tangga5)
##
## Call:
## lm(formula = triceps ~ cut(age, 5), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.5474 -2.0318 -0.4465 1.3682 23.3759
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.2318 0.1994 36.260 < 2e-16 ***
## cut(age, 5)(10.6,20.9] 1.6294 0.3244 5.023 6.16e-07 ***
## cut(age, 5)(20.9,31.2] 5.9923 0.4222 14.192 < 2e-16 ***
## cut(age, 5)(31.2,41.5] 7.5156 0.4506 16.678 < 2e-16 ***
## cut(age, 5)(41.5,51.8] 7.4561 0.5543 13.452 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.939 on 887 degrees of freedom
## Multiple R-squared: 0.3617, Adjusted R-squared: 0.3588
## F-statistic: 125.7 on 4 and 887 DF, p-value: < 2.2e-16
Scatter Plot
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,5),
lty = 1, col = "purple",se = F)+
theme_bw()
Regresi Fungsi Tangga (7)
mod_tangga7 <- lm(triceps ~ cut(age,7),data=triceps)
summary(mod_tangga7)
##
## Call:
## lm(formula = triceps ~ cut(age, 7), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.8063 -1.7592 -0.4366 1.2894 23.1461
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.5592 0.2219 34.060 < 2e-16 ***
## cut(age, 7)(7.62,15] -0.6486 0.3326 -1.950 0.0515 .
## cut(age, 7)(15,22.3] 3.4534 0.4239 8.146 1.27e-15 ***
## cut(age, 7)(22.3,29.7] 5.8947 0.4604 12.804 < 2e-16 ***
## cut(age, 7)(29.7,37] 7.8471 0.5249 14.949 < 2e-16 ***
## cut(age, 7)(37,44.4] 6.9191 0.5391 12.835 < 2e-16 ***
## cut(age, 7)(44.4,51.8] 6.3013 0.6560 9.606 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.805 on 885 degrees of freedom
## Multiple R-squared: 0.4055, Adjusted R-squared: 0.4014
## F-statistic: 100.6 on 6 and 885 DF, p-value: < 2.2e-16
Scatter Plot
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,7),
lty = 1, col = "purple",se = F)+
theme_bw()
Akurasi Model
MSE <- function(pred,actual){
mean((pred-actual)^2)
}
Nilai_MSE <- rbind(MSE(predict(mod_linear),triceps$triceps),
MSE(predict(mod_polinomial2),triceps$triceps),
MSE(predict(mod_polinomial3),triceps$triceps),
MSE(predict(mod_tangga5),triceps$triceps),
MSE(predict(mod_tangga7),triceps$triceps))
Nama_Model <- c("Linear","Poly (ordo=2)", "Poly (ordo=3)",
"Tangga (breaks=5)", "Tangga (breaks=7)")
ringkasan <- data.frame(Nama_Model, Nilai_MSE)
kableExtra::kable(head(ringkasan) ,caption = 'Perbandingan Akurasi Ketiga Model')
| Nama_Model | Nilai_MSE |
|---|---|
| Linear | 16.01758 |
| Poly (ordo=2) | 16.00636 |
| Poly (ordo=3) | 14.89621 |
| Tangga (breaks=5) | 15.42602 |
| Tangga (breaks=7) | 14.36779 |
Berdasarkan nilai MSE terkecil, model regresi fungsi tangga dengan breaks sebesar 7 merupakan model terbaik dibandingkan model lainnya.
Evaluasi Model dengan Cross Validation
Regresi Linear
set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps") #Biasanya menggunakan v = 5 atau 10 (batas wajar)
metric_linear <- map_dfr(cross_val$splits,
function(x){
mod <- lm(triceps ~ age,data=triceps[x$in_id,])
pred <- predict(mod,newdata=triceps[-x$in_id,])
truth <- triceps[-x$in_id,]$triceps
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae") #mae diabsolutkan
return(metric)
}
)
metric_linear
## # A tibble: 10 x 2
## rmse mae
## <dbl> <dbl>
## 1 3.65 2.82
## 2 4.62 3.22
## 3 4.38 3.00
## 4 3.85 2.80
## 5 3.08 2.36
## 6 3.83 2.81
## 7 3.59 2.78
## 8 4.66 3.06
## 9 3.50 2.56
## 10 4.59 2.93
#Menghitung rata-rata untuk 10 folds
mean_metric_linear <- colMeans(metric_linear)
mean_metric_linear
## rmse mae
## 3.973249 2.833886
Regresi Polinomial Derajat 2
set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
metric_poly2 <- map_dfr(cross_val$splits,
function(x){
mod <- lm(triceps ~ poly(age,2,raw = T),data=triceps[x$in_id,])
pred <- predict(mod,newdata=triceps[-x$in_id,])
truth <- triceps[-x$in_id,]$triceps
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_poly2
## # A tibble: 10 x 2
## rmse mae
## <dbl> <dbl>
## 1 3.64 2.82
## 2 4.62 3.26
## 3 4.39 3.02
## 4 3.85 2.81
## 5 3.10 2.38
## 6 3.82 2.81
## 7 3.62 2.83
## 8 4.65 3.07
## 9 3.50 2.57
## 10 4.59 2.94
#Menghitung rata-rata untuk 10 folds
mean_metric_poly2 <- colMeans(metric_poly2)
mean_metric_poly2
## rmse mae
## 3.977777 2.851787
Regresi Polinomial Derajat 3
set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
metric_poly3 <- map_dfr(cross_val$splits,
function(x){
mod <- lm(triceps ~ poly(age,3,raw = T),data=triceps[x$in_id,])
pred <- predict(mod,newdata=triceps[-x$in_id,])
truth <- triceps[-x$in_id,]$triceps
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_poly3
## # A tibble: 10 x 2
## rmse mae
## <dbl> <dbl>
## 1 3.49 2.60
## 2 4.48 2.99
## 3 4.21 2.85
## 4 4.02 2.75
## 5 3.03 2.09
## 6 3.63 2.59
## 7 3.53 2.52
## 8 4.54 2.91
## 9 3.27 2.33
## 10 4.27 2.68
#Menghitung rata-rata untuk 10 folds
mean_metric_poly3 <- colMeans(metric_poly3)
mean_metric_poly3
## rmse mae
## 3.845976 2.632125
Regresi Fungsi Tangga
set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
breaks <- 3:10 #titik cut data
best_tangga <- map_dfr(breaks, function(i){
metric_tangga <- map_dfr(cross_val$splits,
function(x){
training <- triceps[x$in_id,]
training$age <- cut(training$age,i)
mod <- lm(triceps ~ age,data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))
testing <- triceps[-x$in_id,]
age_new <- cut(testing$age,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
pred <- predict(mod,newdata=list(age=age_new))
truth <- testing$triceps
data_eval <- na.omit(data.frame(truth,pred))
rmse <- mlr3measures::rmse(truth = data_eval$truth,response = data_eval$pred)
mae <- mlr3measures::mae(truth = data_eval$truth,response = data_eval$pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_tangga
#Menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)
mean_metric_tangga
}
)
best_tangga <- cbind(breaks=breaks,best_tangga)
#Menampilkan hasil all breaks
best_tangga
## breaks rmse mae
## 1 3 3.835357 2.618775
## 2 4 3.882932 2.651911
## 3 5 3.917840 2.724368
## 4 6 3.836068 2.622939
## 5 7 3.789715 2.555062
## 6 8 3.812789 2.555563
## 7 9 3.781720 2.518706
## 8 10 3.795877 2.529479
#Berdasarkan rmse terkecil
best_tangga %>% slice_min(rmse)
## breaks rmse mae
## 1 9 3.78172 2.518706
#Berdasarkan mae terkecil
best_tangga %>% slice_min(mae)
## breaks rmse mae
## 1 9 3.78172 2.518706
Perbandingan Model
nilai_metric <- rbind(mean_metric_linear,
mean_metric_poly2,
mean_metric_poly3,
best_tangga %>% dplyr::select(-1) %>% slice_min(mae))
nama_model <- c("Linear","Poly2","Poly3","Tangga (breaks=9)")
data.frame(nama_model,nilai_metric)
## nama_model rmse mae
## 1 Linear 3.973249 2.833886
## 2 Poly2 3.977777 2.851787
## 3 Poly3 3.845976 2.632125
## 4 Tangga (breaks=9) 3.781720 2.518706
Berdasarkan nilai rmse dan mae terkecil, model regresi fungsi tangga dengan breaks sebesar 9 merupakan model terbaik dibandingkan model lainnya.
Pertemuan 9
Regresi Spline
#Menentukan banyaknya fungsi basis dan knots
dim(bs(triceps$age, knots = c(10, 20,40)))
## [1] 892 6
#atau
dim(bs(triceps$age, df=6))
## [1] 892 6
#Nilai knots yang ditentukan oleh komputer
attr(bs(triceps$age, df=6),"knots")
## 25% 50% 75%
## 5.5475 12.2100 24.7275
b-spline
Knots Manual
knot_value_manual_3 = c(10, 20,40)
mod_spline_1 = lm(triceps ~ bs(age, knots =knot_value_manual_3 ),data=triceps)
summary(mod_spline_1)
##
## Call:
## lm(formula = triceps ~ bs(age, knots = knot_value_manual_3),
## data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.385 -1.682 -0.393 1.165 22.855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.22533 0.61873 13.294 < 2e-16 ***
## bs(age, knots = knot_value_manual_3)1 -0.06551 1.14972 -0.057 0.955
## bs(age, knots = knot_value_manual_3)2 -4.30051 0.72301 -5.948 3.90e-09 ***
## bs(age, knots = knot_value_manual_3)3 7.80435 1.17793 6.625 6.00e-11 ***
## bs(age, knots = knot_value_manual_3)4 6.14890 1.27439 4.825 1.65e-06 ***
## bs(age, knots = knot_value_manual_3)5 5.56640 1.42225 3.914 9.78e-05 ***
## bs(age, knots = knot_value_manual_3)6 7.90178 1.54514 5.114 3.87e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.76 on 885 degrees of freedom
## Multiple R-squared: 0.4195, Adjusted R-squared: 0.4155
## F-statistic: 106.6 on 6 and 885 DF, p-value: < 2.2e-16
Scatter Plot
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = knot_value_manual_3),
lty = 1,se = F)
Knots Fungsi
knot_value_pc_df_6 = attr(bs(triceps$age, df=6),"knots")
mod_spline_1 = lm(triceps ~ bs(age, knots =knot_value_pc_df_6 ),data=triceps)
summary(mod_spline_1)
##
## Call:
## lm(formula = triceps ~ bs(age, knots = knot_value_pc_df_6), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.0056 -1.7556 -0.2944 1.2008 23.0695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5925 0.8770 7.517 1.37e-13 ***
## bs(age, knots = knot_value_pc_df_6)1 3.7961 1.5015 2.528 0.01164 *
## bs(age, knots = knot_value_pc_df_6)2 -2.0749 0.8884 -2.335 0.01974 *
## bs(age, knots = knot_value_pc_df_6)3 1.5139 1.1645 1.300 0.19391
## bs(age, knots = knot_value_pc_df_6)4 11.6394 1.3144 8.855 < 2e-16 ***
## bs(age, knots = knot_value_pc_df_6)5 5.9680 1.5602 3.825 0.00014 ***
## bs(age, knots = knot_value_pc_df_6)6 8.9127 1.4053 6.342 3.60e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.757 on 885 degrees of freedom
## Multiple R-squared: 0.4206, Adjusted R-squared: 0.4167
## F-statistic: 107.1 on 6 and 885 DF, p-value: < 2.2e-16
Scatter Plot
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = knot_value_pc_df_6),
lty = 1,se = F)
Natural Spline
knot_value_manual_3 = c(10, 20,40)
mod_spline3ns = lm(triceps ~ ns(age, knots = knot_value_manual_3),data=triceps)
summary(mod_spline3ns)
##
## Call:
## lm(formula = triceps ~ ns(age, knots = knot_value_manual_3),
## data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7220 -1.7640 -0.3985 1.1908 22.9684
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.8748 0.3742 23.714 < 2e-16 ***
## ns(age, knots = knot_value_manual_3)1 7.0119 0.6728 10.422 < 2e-16 ***
## ns(age, knots = knot_value_manual_3)2 6.0762 0.8625 7.045 3.72e-12 ***
## ns(age, knots = knot_value_manual_3)3 2.0780 1.0632 1.954 0.051 .
## ns(age, knots = knot_value_manual_3)4 8.8616 0.9930 8.924 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.762 on 887 degrees of freedom
## Multiple R-squared: 0.4176, Adjusted R-squared: 0.415
## F-statistic: 159 on 4 and 887 DF, p-value: < 2.2e-16
Scatter Plot
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black")+
stat_smooth(method = "lm",
formula = y~ns(x, knots = knot_value_manual_3),
lty = 1,se=F)
Smoothing Spline
Fungsi smooth.spline() dapat digunakan untuk melakukan smoothing spline pada R
model_sms <- with(data = triceps,smooth.spline(age,triceps))
model_sms
## Call:
## smooth.spline(x = age, y = triceps)
##
## Smoothing Parameter spar= 0.5162704 lambda= 1.232259e-06 (13 iterations)
## Equivalent Degrees of Freedom (Df): 50.00639
## Penalized Criterion (RSS): 8591.581
## GCV: 13.77835
Fungsi smooth.spline secara otomatis memilih paramter lambda terbaik dengan menggunakan Cross-Validation (CV) dan Generalized Cross-Validation (GCV). Perbedaan utama antara keduanya adalah pada ukuran kebaikan model yang digunakan. Kalau CV menggunakan MSE sebagai ukuran kebaikan model, sedangkan GCV menggunakan Weighted-MSE. Banyaknya folds (lipatan) pada CV dan GCV ini adalah sejumlah observasinya.
Scatter Plot
pred_data <- broom::augment(model_sms)
ggplot(pred_data,aes(x=x,y=y))+
geom_point(alpha=0.55, color="black")+
geom_line(aes(y=.fitted),col="blue",
lty=1)+
xlab("age")+
ylab("triceps")+
theme_bw()
Pengaruh parameter lambda terhadap tingkat smoothness kurva bisa dilihat dari ilustrasi berikut:
model_sms_lambda <- data.frame(lambda=seq(0,5,by=0.5)) %>%
group_by(lambda) %>%
do(broom::augment(with(data = triceps,smooth.spline(age,triceps,lambda = .$lambda))))
p <- ggplot(model_sms_lambda,
aes(x=x,y=y))+
geom_line(aes(y=.fitted),
col="blue",
lty=1
)+
facet_wrap(~lambda)
p
Jika kita tentukan df=7, maka hasil kurva model smooth.spline akan lebih merepresentasikan data.
model_sms <- with(data = triceps,smooth.spline(age,triceps,df=7))
model_sms
## Call:
## smooth.spline(x = age, y = triceps, df = 7)
##
## Smoothing Parameter spar= 1.049874 lambda= 0.008827582 (11 iterations)
## Equivalent Degrees of Freedom (Df): 7.000747
## Penalized Criterion (RSS): 10112.16
## GCV: 14.20355
Scatter Plot
pred_data <- broom::augment(model_sms)
ggplot(pred_data,aes(x=x,y=y))+
geom_point(alpha=0.55, color="black")+
geom_line(aes(y=.fitted),col="blue",
lty=1)+
xlab("age")+
ylab("triceps")+
theme_bw()
LOESS
Masih menggunakan data triceps seperti pada ilustrasi sebelumnya, kali ini kita akan mencoba melakukan pendekatan local regression dengan fungsi loess().
model_loess <- loess(triceps~age,
data = triceps)
summary(model_loess)
## Call:
## loess(formula = triceps ~ age, data = triceps)
##
## Number of Observations: 892
## Equivalent Number of Parameters: 4.6
## Residual Standard Error: 3.777
## Trace of smoother matrix: 5.01 (exact)
##
## Control settings:
## span : 0.75
## degree : 2
## family : gaussian
## surface : interpolate cell = 0.2
## normalize: TRUE
## parametric: FALSE
## drop.square: FALSE
Pengaruh parameter span terhadap tingkat smoothness kurva bisa dilihat dari ilustrasi berikut:
model_loess_span <- data.frame(span=seq(0.1,5,by=0.5)) %>%
group_by(span) %>%
do(broom::augment(loess(triceps~age,
data = triceps,span=.$span)))
p2 <- ggplot(model_loess_span,
aes(x=age,y=triceps))+
geom_line(aes(y=.fitted),
col="blue",
lty=1
)+
facet_wrap(~span)
p2
Pendekatan ini juga dapat dilakukan dengan fungsi stat_smooth() pada package ggplot2.
ggplot(triceps, aes(age,triceps)) +
geom_point(alpha=0.5,color="black") +
stat_smooth(method='loess',
formula=y~x,
span = 0.75,
col="blue",
lty=1,
se=F)
Tuning span dapat dilakukan dengan menggunakan cross-validation:
set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
span <- seq(0.1,1,length.out=50)
best_loess <- map_dfr(span, function(i){
metric_loess <- map_dfr(cross_val$splits,
function(x){
mod <- loess(triceps ~ age,span = i,
data=triceps[x$in_id,])
pred <- predict(mod,
newdata=triceps[-x$in_id,])
truth <- triceps[-x$in_id,]$triceps
data_eval <- na.omit(data.frame(pred=pred,
truth=truth))
rmse <- mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred
)
mae <- mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
head(metric_loess,20)
#Menghitung rata-rata untuk 10 folds
mean_metric_loess <- colMeans(metric_loess)
mean_metric_loess
}
)
best_loess <- cbind(span=span,best_loess)
#Menampilkan hasil all breaks
best_loess
## span rmse mae
## 1 0.1000000 3.773822 2.495752
## 2 0.1183673 3.771342 2.493506
## 3 0.1367347 3.766450 2.487557
## 4 0.1551020 3.757385 2.480571
## 5 0.1734694 3.749539 2.473494
## 6 0.1918367 3.746288 2.470915
## 7 0.2102041 3.742741 2.467990
## 8 0.2285714 3.740397 2.465388
## 9 0.2469388 3.737724 2.463311
## 10 0.2653061 3.737105 2.462252
## 11 0.2836735 3.735645 2.460514
## 12 0.3020408 3.734522 2.459225
## 13 0.3204082 3.733298 2.457694
## 14 0.3387755 3.732538 2.456280
## 15 0.3571429 3.732082 2.455452
## 16 0.3755102 3.731261 2.454642
## 17 0.3938776 3.730882 2.454293
## 18 0.4122449 3.730585 2.454291
## 19 0.4306122 3.730351 2.454869
## 20 0.4489796 3.730717 2.456222
## 21 0.4673469 3.730831 2.456768
## 22 0.4857143 3.731387 2.458307
## 23 0.5040816 3.732091 2.460189
## 24 0.5224490 3.732392 2.461811
## 25 0.5408163 3.733609 2.464543
## 26 0.5591837 3.734393 2.467180
## 27 0.5775510 3.735554 2.470517
## 28 0.5959184 3.736679 2.473012
## 29 0.6142857 3.738227 2.476519
## 30 0.6326531 3.741401 2.476887
## 31 0.6510204 3.742660 2.479842
## 32 0.6693878 3.744336 2.483581
## 33 0.6877551 3.746651 2.488792
## 34 0.7061224 3.748708 2.492180
## 35 0.7244898 3.750734 2.495218
## 36 0.7428571 3.752815 2.498359
## 37 0.7612245 3.753998 2.504175
## 38 0.7795918 3.755981 2.511421
## 39 0.7979592 3.756794 2.514375
## 40 0.8163265 3.759456 2.522994
## 41 0.8346939 3.767892 2.543250
## 42 0.8530612 3.777168 2.557863
## 43 0.8714286 3.782407 2.566972
## 44 0.8897959 3.788428 2.578042
## 45 0.9081633 3.797002 2.595698
## 46 0.9265306 3.803259 2.609852
## 47 0.9448980 3.809478 2.623386
## 48 0.9632653 3.824192 2.655691
## 49 0.9816327 3.835596 2.679155
## 50 1.0000000 3.860514 2.718911
#Berdasarkan nilai rmse terkecil
best_loess %>% slice_min(rmse)
## span rmse mae
## 1 0.4306122 3.730351 2.454869
#Berdasarkan nilai mae terkecil
best_loess %>% slice_min(mae)
## span rmse mae
## 1 0.4122449 3.730585 2.454291
Scatter Plot
ggplot(triceps, aes(age,triceps)) +
geom_point(alpha=0.5,color="black") +
stat_smooth(method='loess',
formula=y~x,
span = 0.4306122,
col="blue",
lty=1,
se=F)
Latihan: Data Auto
Lakukan analisis regresi nonlinier pada data berikut:
Pra-proccessing
autodt <- Auto %>% dplyr::select(mpg, horsepower, origin)
str(autodt)
## 'data.frame': 392 obs. of 3 variables:
## $ mpg : num 18 15 18 16 17 15 14 14 14 15 ...
## $ horsepower: num 130 165 150 150 140 198 220 215 225 190 ...
## $ origin : num 1 1 1 1 1 1 1 1 1 1 ...
#Statistik dekriptif
summary(autodt)
## mpg horsepower origin
## Min. : 9.00 Min. : 46.0 Min. :1.000
## 1st Qu.:17.00 1st Qu.: 75.0 1st Qu.:1.000
## Median :22.75 Median : 93.5 Median :1.000
## Mean :23.45 Mean :104.5 Mean :1.577
## 3rd Qu.:29.00 3rd Qu.:126.0 3rd Qu.:2.000
## Max. :46.60 Max. :230.0 Max. :3.000
kableExtra::kable(head(autodt) ,caption = 'Tabulasi Data Auto')
| mpg | horsepower | origin |
|---|---|---|
| 18 | 130 | 1 |
| 15 | 165 | 1 |
| 18 | 150 | 1 |
| 16 | 150 | 1 |
| 17 | 140 | 1 |
| 15 | 198 | 1 |
Eksploratif
pairs(autodt, pch=19, lower.panel=NULL)
mpg <- autodt$mpg
horsepower <- autodt$horsepower
origin <- autodt$origin
clist <- c("lightblue", "orange", "purple")
plot(mpg, horsepower, pch=19, col=clist[origin])
Pemodelan
Model origin ~ mpg
Regresi Linear V1
lin.mod.1 <- lm(origin ~ mpg, data=autodt)
plot(mpg, origin, pch=19)
lines(mpg, lin.mod.1$fitted.values, col="purple")
summary(lin.mod.1)
##
## Call:
## lm(formula = origin ~ mpg, data = autodt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.48384 -0.40469 -0.08386 0.34740 1.74114
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.208871 0.106520 1.961 0.0506 .
## mpg 0.058333 0.004311 13.531 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6654 on 390 degrees of freedom
## Multiple R-squared: 0.3195, Adjusted R-squared: 0.3177
## F-statistic: 183.1 on 1 and 390 DF, p-value: < 2.2e-16
Regresi Linear V2
lin.mod.2 <- lm(horsepower ~ mpg, data=autodt)
plot(mpg, horsepower, pch=19)
lines(mpg, lin.mod.2$fitted.values, col="purple")
summary(lin.mod.2)
##
## Call:
## lm(formula = horsepower ~ mpg, data = autodt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.892 -15.716 -2.094 13.108 96.947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 194.4756 3.8732 50.21 <2e-16 ***
## mpg -3.8389 0.1568 -24.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.19 on 390 degrees of freedom
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.6049
## F-statistic: 599.7 on 1 and 390 DF, p-value: < 2.2e-16
Regresi Polinomial
#Penentuan derajat polinomial terbaik dibatasi hingga derajat ke-17
df.list.pol <- data.frame("df" = c(rep(NA, 10)), "AIC1" = c(rep(NA, 10)), "MSE1" = c(rep(NA, 10)), "AIC2" = c(rep(NA, 10)), "MSE2" = c(rep(NA, 10)))
for(j in 2:3){
for(i in 2:17){
pol.mod <- lm(autodt[,j] ~ poly(mpg,i,raw = T), data=autodt)
AIC.pol <- AIC(pol.mod)
MSE.pol <- mean((predict(pol.mod)-autodt[,j])^2)
if(j == 3){
df.list.pol[i-1,1:3] <- c(i, AIC.pol, MSE.pol)}else{
df.list.pol[i-1, 4:5] <- c(AIC.pol, MSE.pol)
}
}
}
knitr::kable(df.list.pol)
| df | AIC1 | MSE1 | AIC2 | MSE2 |
|---|---|---|---|---|
| 2 | 797.6568 | 0.4389163 | 3485.217 | 416.7871 |
| 3 | 796.2795 | 0.4351511 | 3454.949 | 383.8523 |
| 4 | 797.3754 | 0.4341486 | 3456.813 | 383.7195 |
| 5 | 799.2506 | 0.4340104 | 3455.310 | 380.3060 |
| 6 | 800.0427 | 0.4326751 | 3455.135 | 378.2017 |
| 7 | 801.2607 | 0.4318129 | 3455.139 | 376.2801 |
| 8 | 802.3611 | 0.4308230 | 3454.849 | 374.0890 |
| 9 | 804.3474 | 0.4308080 | 3455.295 | 372.6086 |
| 10 | 803.5020 | 0.4276922 | 3456.768 | 372.1082 |
| 11 | 805.3653 | 0.4275430 | 3455.520 | 369.0375 |
| 12 | 805.3653 | 0.4275430 | 3455.520 | 369.0375 |
| 13 | 800.9718 | 0.4206264 | 3455.558 | 367.1956 |
| 14 | 800.9718 | 0.4206264 | 3455.558 | 367.1956 |
| 15 | 798.6931 | 0.4160602 | 3457.551 | 367.1889 |
| 16 | 798.6931 | 0.4160602 | 3457.551 | 367.1889 |
| 17 | 799.4754 | 0.4147698 | 3458.900 | 366.5791 |
Berdasarkan hasil looping yang tersedia pada tabel di atas, diketahui bahwa derajat bebas polinomial yang terpilih untuk korelasi model origin ~ mpg adalah tiga dan untuk korelasi model horsepower ~ mpg adalah depalan berdasarkan nilai AIC terkecil.
Scatter Plot Model origin ~ mpg
ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~poly(x,3, raw=T), col="purple", se=F)
Scatter Plot Model horsepower ~ mpg
ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~poly(x,8, raw=T), col="purple", se=F)
Regresi Fungsi Tangga
#Penentuan derajat fungsi tangga terbaik dibatasi hingga derajat ke-17
df.list.step <- data.frame("df" = c(rep(NA, 10)), "AIC1" = c(rep(NA, 10)), "MSE1" = c(rep(NA, 10)), "AIC2" = c(rep(NA, 10)), "MSE2" = c(rep(NA, 10)))
for(j in 2:3){
for(i in 3:17){
step.mod <- lm(autodt[,j] ~ cut(mpg,i), data=autodt)
AIC.step <- AIC(step.mod)
MSE.step <- mean((predict(step.mod)-autodt[,j])^2)
if(j == 3){
df.list.step[i-2,1:3] <- c(i, AIC.step, MSE.step)}else{
df.list.step[i-2, 4:5] <- c(AIC.step, MSE.step)
}
}
}
knitr::kable(df.list.step)
| df | AIC1 | MSE1 | AIC2 | MSE2 |
|---|---|---|---|---|
| 3 | 825.4751 | 0.4711959 | 3722.514 | 763.5092 |
| 4 | 816.3085 | 0.4579626 | 3605.549 | 563.6549 |
| 5 | 812.0036 | 0.4506557 | 3526.845 | 458.7770 |
| 6 | 812.9059 | 0.4493955 | 3554.364 | 489.6366 |
| 7 | 800.0531 | 0.4326866 | 3521.117 | 447.5318 |
| 8 | 809.4147 | 0.4408891 | 3551.701 | 481.3840 |
| 9 | 807.4994 | 0.4365074 | 3494.485 | 413.8924 |
| 10 | 805.2416 | 0.4317918 | 3478.747 | 395.5813 |
| 11 | 802.1242 | 0.4261916 | 3499.380 | 414.8383 |
| 12 | 792.2372 | 0.4134617 | 3496.674 | 409.8881 |
| 13 | 802.4971 | 0.4222663 | 3468.818 | 379.8288 |
| 14 | 800.3072 | 0.4177769 | 3463.291 | 372.6051 |
| 15 | 796.4998 | 0.4116333 | 3468.751 | 375.9084 |
| 16 | 803.3598 | 0.4167684 | 3490.100 | 394.9285 |
| 17 | 796.7535 | 0.4077180 | 3470.907 | 374.1436 |
Berdasarkan hasil looping yang tersedia pada tabel di atas, diketahui bahwa derajat bebas (breaks) fungsi tangga yang terpilih untuk korelasi model origin ~ mpg adalah dua belas dan untuk korelasi model horsepower ~ mpg adalah sepuluh berdasarkan nilai AIC terkecil.
Scatter Plot Model origin ~ mpg
ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~cut(x,12), col="purple", se=F)
Scatter Plot Model horsepower ~ mpg
ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~cut(x,10), col="purple", se=F)
Perbandingan Akurasi Model V1
pol.mod.1 <- lm(origin ~ poly(mpg,3,raw = T), data=autodt)
pol.mod.2 <- lm(horsepower ~ poly(mpg,8,raw = T), data=autodt)
step.mod.1 <- lm(origin ~ cut(mpg,12), data=autodt)
step.mod.2 <- lm(horsepower ~ cut(mpg,10), data=autodt)
model.auto1 <- data.frame("Model" = c("Linear (mpg~origin)", "Linear (mpg~horsepower)", "Polinomial(3) (mpg~origin)", "Polinomial(8) (mpg~horsepower)", "Fungsi Tangga(12) (mpg~origin)", "Fungsi Tangga(10) (mpg~horsepower)"), "AIC" = c(AIC(lin.mod.1), AIC(lin.mod.2), AIC(pol.mod.1), AIC(pol.mod.2), AIC(step.mod.1), AIC(step.mod.2)), "MSE" = c(MSE(predict(lin.mod.1),autodt$origin), MSE(predict(lin.mod.2),autodt$horsepower), MSE(predict(pol.mod.1),autodt$origin), MSE(predict(pol.mod.2),autodt$horsepower), MSE(predict(step.mod.1),autodt$origin), MSE(predict(step.mod.2),autodt$horsepower)))
kableExtra::kable(head(model.auto1) ,caption = 'Perbandingan Akurasi Model V1')
| Model | AIC | MSE |
|---|---|---|
| Linear (mpg~origin) | 797.0222 | 0.4404478 |
| Linear (mpg~horsepower) | 3614.3235 | 582.3256764 |
| Polinomial(3) (mpg~origin) | 796.2795 | 0.4351511 |
| Polinomial(8) (mpg~horsepower) | 3454.8494 | 374.0890447 |
| Fungsi Tangga(12) (mpg~origin) | 792.2372 | 0.4134617 |
| Fungsi Tangga(10) (mpg~horsepower) | 3478.7475 | 395.5813018 |
Berdasarkah perbandingan akurasi model, diketahui bahwa untuk korelasi model origin ~ mpg lebih baik menggunakan model regresi fungsi tangga dengan breaks 12. Sementara itu, untuk korelasi model horsepower ~ mpg lebih baik menggunakan model polinomial dengan derajat bebas 3. Kedua model terbaik ini masih bersifat sementara karena masih dapat dianalisis dengan metode pemulusan.
Regresi Spline
Penentuan banyaknya knots dengan menggunakan bantuan komputer.
knots.val.auto <- attr(bs(autodt$mpg, df=6), "knots")
knots.val.auto
## 25% 50% 75%
## 17.00 22.75 29.00
Regresi B-Spline
Regresi B-Spline origin ~ mpg
spline.mod.auto1 <- lm(origin ~ bs(mpg, knots=knots.val.auto), data=autodt)
summary(spline.mod.auto1)
##
## Call:
## lm(formula = origin ~ bs(mpg, knots = knots.val.auto), data = autodt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.39803 -0.38041 -0.02476 0.33868 1.81900
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.04633 0.44800 2.336 0.0200 *
## bs(mpg, knots = knots.val.auto)1 -0.09890 0.64164 -0.154 0.8776
## bs(mpg, knots = knots.val.auto)2 -0.07956 0.42992 -0.185 0.8533
## bs(mpg, knots = knots.val.auto)3 0.57546 0.49774 1.156 0.2483
## bs(mpg, knots = knots.val.auto)4 1.14229 0.49623 2.302 0.0219 *
## bs(mpg, knots = knots.val.auto)5 1.53577 0.64019 2.399 0.0169 *
## bs(mpg, knots = knots.val.auto)6 1.30545 0.61946 2.107 0.0357 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6649 on 385 degrees of freedom
## Multiple R-squared: 0.3292, Adjusted R-squared: 0.3187
## F-statistic: 31.49 on 6 and 385 DF, p-value: < 2.2e-16
scatter Plot B-Spline origin ~ mpg
ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="brown") + stat_smooth(method="lm", formula=y~bs(x, knots=knots.val.auto), lty=1, se=F)
Regresi B-Spline horsepower ~ mpg
spline.mod.auto2 <- lm(horsepower ~ bs(mpg, knots=knots.val.auto), data=autodt)
summary(spline.mod.auto2)
##
## Call:
## lm(formula = horsepower ~ bs(mpg, knots = knots.val.auto), data = autodt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74.043 -10.103 -0.818 8.075 95.778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 195.012 13.181 14.795 < 2e-16 ***
## bs(mpg, knots = knots.val.auto)1 2.792 18.879 0.148 0.882
## bs(mpg, knots = knots.val.auto)2 -80.488 12.649 -6.363 5.62e-10 ***
## bs(mpg, knots = knots.val.auto)3 -103.774 14.645 -7.086 6.62e-12 ***
## bs(mpg, knots = knots.val.auto)4 -126.115 14.600 -8.638 < 2e-16 ***
## bs(mpg, knots = knots.val.auto)5 -127.143 18.836 -6.750 5.45e-11 ***
## bs(mpg, knots = knots.val.auto)6 -141.413 18.226 -7.759 7.77e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.56 on 385 degrees of freedom
## Multiple R-squared: 0.7457, Adjusted R-squared: 0.7417
## F-statistic: 188.1 on 6 and 385 DF, p-value: < 2.2e-16
Scatter Plot B-Spline horsepower ~ mpg
ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="brown") + stat_smooth(method="lm", formula=y~bs(x, knots=knots.val.auto), lty=1, se=F)
Natural Spline origin ~ mpg
ns.spline.auto1 <- lm(origin ~ ns(mpg, knots=knots.val.auto), data=autodt)
summary(ns.spline.auto1)
##
## Call:
## lm(formula = origin ~ ns(mpg, knots = knots.val.auto), data = autodt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.36121 -0.36897 -0.03639 0.34308 1.82256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9776 0.2295 4.261 2.56e-05 ***
## ns(mpg, knots = knots.val.auto)1 0.6192 0.2188 2.830 0.00489 **
## ns(mpg, knots = knots.val.auto)2 1.2993 0.2091 6.215 1.33e-09 ***
## ns(mpg, knots = knots.val.auto)3 1.4244 0.5348 2.663 0.00806 **
## ns(mpg, knots = knots.val.auto)4 1.5339 0.2913 5.265 2.33e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6633 on 387 degrees of freedom
## Multiple R-squared: 0.3288, Adjusted R-squared: 0.3218
## F-statistic: 47.39 on 4 and 387 DF, p-value: < 2.2e-16
Scatter Plot Natural Spline origin ~ mpg
ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="brown") + stat_smooth(method="lm", formula=y~ns(x, knots=knots.val.auto), lty=1,se=F)
Natural Spline horsepower ~ mpg
ns.spline.auto2 <- lm(horsepower ~ ns(mpg, knots=knots.val.auto), data=autodt)
summary(ns.spline.auto2)
##
## Call:
## lm(formula = horsepower ~ ns(mpg, knots = knots.val.auto), data = autodt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -72.461 -10.731 -1.433 8.759 95.650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 218.000 6.786 32.13 <2e-16 ***
## ns(mpg, knots = knots.val.auto)1 -130.511 6.470 -20.17 <2e-16 ***
## ns(mpg, knots = knots.val.auto)2 -109.368 6.183 -17.69 <2e-16 ***
## ns(mpg, knots = knots.val.auto)3 -237.134 15.815 -14.99 <2e-16 ***
## ns(mpg, knots = knots.val.auto)4 -115.116 8.615 -13.36 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.62 on 387 degrees of freedom
## Multiple R-squared: 0.7429, Adjusted R-squared: 0.7403
## F-statistic: 279.6 on 4 and 387 DF, p-value: < 2.2e-16
Scatter PLot Natural Spline origin ~ mpg
ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="brown") + stat_smooth(method="lm", formula=y~ns(x, knots=knots.val.auto), lty=1,se=F)
Smoothing Spline
Smoothing Spline origin ~ mpg
ss.auto1 <- with(data=autodt, smooth.spline(mpg, origin))
ss.auto1
## Call:
## smooth.spline(x = mpg, y = origin)
##
## Smoothing Parameter spar= 0.6891065 lambda= 0.0002244654 (11 iterations)
## Equivalent Degrees of Freedom (Df): 12.93154
## Penalized Criterion (RSS): 61.61513
## GCV: 0.4385944
Scatter Plot Smoothing Spline origin ~ mpg
pred.ss1 <- broom::augment(ss.auto1)
ggplot(pred.ss1, aes(x=x, y=y)) + geom_point(color="brown") + geom_line(aes(y=.fitted), col="orange", lty=1) + xlab("mpg") + ylab("origin")
Smoothing Spline horsepower ~ mpg
ss.auto2 <- with(data=autodt, smooth.spline(mpg, horsepower))
ss.auto2
## Call:
## smooth.spline(x = mpg, y = horsepower)
##
## Smoothing Parameter spar= 0.8528728 lambda= 0.003422279 (12 iterations)
## Equivalent Degrees of Freedom (Df): 6.989153
## Penalized Criterion (RSS): 35962.53
## GCV: 386.3516
Scatter Plot Smoothing Spline horsepower ~ mpg
pred.ss2 <- broom::augment(ss.auto2)
ggplot(pred.ss2, aes(x=x, y=y)) + geom_point(color="brown") + geom_line(aes(y=.fitted), col="orange", lty=1) + xlab("mpg") + ylab("horsepower")
LOESS Functin
LOESS Functin origin ~ mpg
loess.auto1 <- loess(origin~mpg, data=autodt)
summary(loess.auto1)
## Call:
## loess(formula = origin ~ mpg, data = autodt)
##
## Number of Observations: 392
## Equivalent Number of Parameters: 4.8
## Residual Standard Error: 0.6627
## Trace of smoother matrix: 5.24 (exact)
##
## Control settings:
## span : 0.75
## degree : 2
## family : gaussian
## surface : interpolate cell = 0.2
## normalize: TRUE
## parametric: FALSE
## drop.square: FALSE
Scatter Plot with Standard Erorr LOESS Functin origin ~ mpg
ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="red") + stat_smooth(method="loess", formula=y~x, col="purple", show.legend=T)
LOESS Functin horsepower ~ mpg
loess.auto2 <- loess(horsepower~mpg, data=autodt)
summary(loess.auto2)
## Call:
## loess(formula = horsepower ~ mpg, data = autodt)
##
## Number of Observations: 392
## Equivalent Number of Parameters: 4.8
## Residual Standard Error: 19.58
## Trace of smoother matrix: 5.24 (exact)
##
## Control settings:
## span : 0.75
## degree : 2
## family : gaussian
## surface : interpolate cell = 0.2
## normalize: TRUE
## parametric: FALSE
## drop.square: FALSE
Scatter Plot with Standard Erorr LOESS Functin horsepower ~ mpg
ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="red") + stat_smooth(method="loess", formula=y~x, col="purple", show.legend=T)
Eksploratif: Perbandingan Model V2 origin ~ mpg
ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~poly(x,3,raw=T), col="blue", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~cut(x,12), col="purple", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~bs(x, knots=knots.val.auto), col="green", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~ns(x, knots=knots.val.auto), col="brown", lwd=1.2, se=F) + stat_smooth(method="loess", formula=y~x, col="orange", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~x, col="red", lwd=1.2, se=F)
Legend plot:
—–: Model regresi linear
—–: Model regresi polinomial
—–: Model regresi fungsi tangga
—–: Model regresi spline
—–: Model regresi natural spline
—–: Model regresi fungsi LOESS
Perbandingan Akurasi Model V2 origin ~ mpg
model.auto2a <- data.frame(Model = c("Linear", "Polinomial(3)", "Fungsi Tangga(12)", "Regresi Spline", "Natural Spline", "Fungsi LOESS"), MSE = c(MSE(predict(lin.mod.1), autodt$origin), MSE(predict(pol.mod.1), autodt$origin), MSE(predict(step.mod.1), autodt$origin), MSE(predict(spline.mod.auto1), autodt$origin), MSE(predict(ns.spline.auto1), autodt$origin), MSE(predict(loess.auto1), autodt$origin)))
kableExtra::kable(head(model.auto2a) ,caption = 'Perbandingan Akurasi Model V2 (origin ~ mpg)')
| Model | MSE |
|---|---|
| Linear | 0.4404478 |
| Polinomial(3) | 0.4351511 |
| Fungsi Tangga(12) | 0.4134617 |
| Regresi Spline | 0.4341514 |
| Natural Spline | 0.4344177 |
| Fungsi LOESS | 0.4328007 |
Berdasarkah perbandingan akurasi model dengan nilai MSE terkecil, diketahui bahwa model regresi fungsi tangga dengan breaks sebesar 12 merupakan model terbaik untuk korelasi model origin ~ mpg.
Eksploratif: Perbandingan Model V2 horsepower ~ mpg
ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~bs(x, knots=knots.val.auto), col="green", lwd=4, se=F) + stat_smooth(method="loess", formula=y~x, col="orange", lwd=2.5, se=F) + stat_smooth(method="lm", formula=y~cut(x,10), col="purple", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~poly(x,8,raw=T), col="blue", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~ns(x, knots=knots.val.auto), col="brown", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~x, col="red", lwd=1.2, se=F)
Legend plot:
—–: Model regresi linear
—–: Model regresi polinomial
—–: Model regresi fungsi tangga
—–: Model regresi spline
—–: Model regresi natural spline
—–: Model regresi fungsi LOESS
Perbandingan Akurasi Model V2 horsepower ~ mpg
model.auto2b <- data.frame(Model = c("Linear", "Polinomial(8)", "Fungsi Tangga(10)", "Regresi Spline", "Regresi Natural Spline", "LOESS Function"), MSE = c(MSE(predict(lin.mod.2), autodt$horsepower), MSE(predict(pol.mod.2), autodt$horsepower), MSE(predict(step.mod.2), autodt$horsepower), MSE(predict(spline.mod.auto2), autodt$horsepower), MSE(predict(ns.spline.auto2), autodt$horsepower), MSE(predict(loess.auto2), autodt$horsepower)))
kableExtra::kable(head(model.auto2a) ,caption = 'Perbandingan Akurasi Model V2 (horsepower ~ mpg)')
| Model | MSE |
|---|---|
| Linear | 0.4404478 |
| Polinomial(3) | 0.4351511 |
| Fungsi Tangga(12) | 0.4134617 |
| Regresi Spline | 0.4341514 |
| Natural Spline | 0.4344177 |
| Fungsi LOESS | 0.4328007 |
Berdasarkan nilai MSE terkecil pada perbandingan akurasi model di atas, model terbaik yang diperoleh adalah model regresi fungsi tangga dengan breaks 12.
Model with Cross-Validation Evaluation
Pra-proccessing
set.seed(174)
cross.val1 <- vfold_cv(autodt, v=10, strata="origin")
cross.val2 <- vfold_cv(autodt, v=10, strata="horsepower")
Model origin~mpg
Regresi Linear
lin.met1 <- map_dfr(cross.val1$splits, function(x){
mod <- lm(origin ~ mpg,data=autodt[x$in_id,])
pred <- predict(mod,newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$origin
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
m_lin.met1 <- colMeans(lin.met1)
m_lin.met1
## rmse mae
## 0.6631052 0.5147590
Regresi Polinomial Ordo 3
pol.met1 <- map_dfr(cross.val1$splits,
function(x){
mod <- lm(origin ~ poly(mpg,3,raw = T),data=autodt[x$in_id,])
pred <- predict(mod,newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$origin
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
m_pol.met1 <- colMeans(pol.met1)
m_pol.met1
## rmse mae
## 0.6623899 0.5033983
Regresi Fungsi Tangga
breaks <- 3:17
best.step1 <- map_dfr(breaks, function(i){
step.met1 <- map_dfr(cross.val1$splits,
function(x){
training <-autodt[x$in_id,]
training$mpg <- cut(training$mpg,i)
mod <- lm(origin ~ mpg, data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric(sub("\\((.+),.*", "\\1", labs_x)), upper = as.numeric(sub("[^,]*,([^]]*)\\]", "\\1", labs_x)))
testing <- autodt[-x$in_id,]
newdt <- cut(testing$mpg, c(labs_x_breaks[1,1], labs_x_breaks[,2]))
pred <- predict(mod, newdata=list(mpg=newdt))
truth <- testing$origin
data_eval <- na.omit(data.frame(truth,pred))
rmse <- mlr3measures::rmse(truth = data_eval$truth, response = data_eval$pred)
mae <- mlr3measures::mae(truth = data_eval$truth, response = data_eval$pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
step.met1
#Menghitung rata-rata untuk 10 folds
m_step.met1 <- colMeans(step.met1)
m_step.met1
}
)
best.step1 <- cbind(breaks=breaks, best.step1)
best.step1 %>% slice_min(rmse, n=5)
## breaks rmse mae
## 1 12 0.6588186 0.4904860
## 2 17 0.6613277 0.4861881
## 3 14 0.6622931 0.4954355
## 4 7 0.6638867 0.5061256
## 5 15 0.6657763 0.4867893
best.step1 %>% slice_min(mae, n=1)
## breaks rmse mae
## 1 17 0.6613277 0.4861881
Best step yang digunakan, yaitu breaks = 17 berdasarkan nilai mae terkecil.
Regresi Spline
spline.met1 <- map_dfr(cross.val1$splits,
function(x){
mod <- lm(origin ~ bs(mpg,knots=knots.val.auto),data=autodt[x$in_id,])
pred <- predict(mod,newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$origin
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
## Warning in bs(mpg, degree = 3L, knots = c(`25%` = 17, `50%` = 22.75, `75%` = 29:
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(mpg, degree = 3L, knots = c(`25%` = 17, `50%` = 22.75, `75%` = 29:
## some 'x' values beyond boundary knots may cause ill-conditioned bases
m_spline.met1 <- colMeans(spline.met1)
m_spline.met1
## rmse mae
## 0.6687020 0.4995128
Regresi Natural Spline
nspl.met1 <- map_dfr(cross.val1$splits,
function(x){
mod <- lm(origin ~ ns(mpg,knots=knots.val.auto),data=autodt[x$in_id,])
pred <- predict(mod,newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$origin
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
m_nspl.met1 <- colMeans(nspl.met1)
m_nspl.met1
## rmse mae
## 0.6641734 0.4962609
Regresi: LOESS
span <- seq(0.1, 1, length.out=91)
best.loess1 <- map_dfr(span, function(i){
loess.met1 <- map_dfr(cross.val1$splits, function(x){
mod <- loess(origin ~ mpg, span=i, data=autodt[x$in_id,])
pred <- predict(mod, newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$origin
data_eval <- na.omit(data.frame(pred=pred, truth=truth))
rmse <- mlr3measures::rmse(truth=data_eval$truth, response=data_eval$pred)
mae <- mlr3measures::mae(truth=data_eval$truth, response=data_eval$pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
head(loess.met1,20)
#Menghitung rata-rata untuk 10 folds
m_loess.met1 <- colMeans(loess.met1)
m_loess.met1
}
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6453e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.7908e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.0291e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.6179e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.2038e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1236e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.4554e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.6336e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1277e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.5701e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.2038e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6453e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.8633e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.5701e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.8633e-016
best.loess1 <- cbind(span=span,best.loess1)
#Menampilkan hasil all breaks
best.loess1 %>% slice_min(rmse, n=1)
## span rmse mae
## 1 0.28 0.6609029 0.4945192
best.loess1 %>% slice_min(mae, n=1)
## span rmse mae
## 1 0.11 0.6677179 0.4876461
Berdasarkan nilai rmse terkecil, best LOESS pada span 0.28, sedangkan best LOESS pada span 0.11 berdasarkan nilai mae terkecil.
Perbandingan Akurasi Model
df.cv1 <- rbind(m_lin.met1, m_pol.met1, best.step1[9,2:3], m_spline.met1, m_nspl.met1, best.loess1[35,-1])
rownames(df.cv1) <- c("Linear", "Polinomial(3)", "Fungsi Tangga(17)", "Spline", "Natural Spline", "Fungsi LOESS(0.11)")
kableExtra::kable(head(df.cv1) ,caption = 'Perbandingan Akurasi Model dengan *Cross Validation Evaluation* (origin ~ mpg)')
| rmse | mae | |
|---|---|---|
| Linear | 0.6631052 | 0.5147590 |
| Polinomial(3) | 0.6623899 | 0.5033983 |
| Fungsi Tangga(17) | 0.6725749 | 0.5016854 |
| Spline | 0.6687020 | 0.4995128 |
| Natural Spline | 0.6641734 | 0.4962609 |
| Fungsi LOESS(0.11) | 0.6622263 | 0.4951171 |
Berdasarkan nilai rmse dan mae terkecil, dengan menggunakan cross-validation evaluation 10 folds, model regresi fungsi LOESS dengan span sebesar 0.11 merupakan model terbaik untuk korelasi model origin ~ mpg dibandingkan model lainnya.
Model horsepower~mpg
Regresi Linier
lin.met2 <- map_dfr(cross.val2$splits, function(x){
mod <- lm(horsepower ~ mpg,data=autodt[x$in_id,])
pred <- predict(mod,newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$horsepower
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
m_lin.met2 <- colMeans(lin.met2)
m_lin.met2
## rmse mae
## 24.07495 18.46422
Regresi Polinomial Ordo 8
pol.met2 <- map_dfr(cross.val2$splits,
function(x){
mod <- lm(horsepower ~ poly(mpg,8,raw = T),data=autodt[x$in_id,])
pred <- predict(mod,newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$horsepower
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
m_pol.met2 <- colMeans(pol.met2)
m_pol.met2
## rmse mae
## 20.50232 14.37619
Regresi Fungsi Tangga
breaks <- 3:12
best.step2 <- map_dfr(breaks, function(i){
step.met2 <- map_dfr(cross.val2$splits,
function(x){
training <- autodt[x$in_id,]
training$mpg <- cut(training$mpg,i)
mod <- lm(horsepower ~ mpg, data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric(sub("\\((.+),.*", "\\1", labs_x)), upper = as.numeric(sub("[^,]*,([^]]*)\\]", "\\1", labs_x)))
testing <- autodt[-x$in_id,]
newdt <- cut(testing$mpg, c(labs_x_breaks[1,1], labs_x_breaks[,2]))
pred <- predict(mod, newdata=list(mpg=newdt))
truth <- testing$horsepower
data_eval <- na.omit(data.frame(truth,pred))
rmse <- mlr3measures::rmse(truth = data_eval$truth, response = data_eval$pred)
mae <- mlr3measures::mae(truth = data_eval$truth, response = data_eval$pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
step.met2
# menghitung rata-rata untuk 10 folds
m_step.met2 <- colMeans(step.met2)
m_step.met2
}
)
best.step2 <- cbind(breaks=breaks, best.step2)
# menampilkan hasil all breaks
best.step2 %>% slice_min(rmse, n=5)
## breaks rmse mae
## 1 10 19.94188 14.00930
## 2 12 20.20257 14.48694
## 3 11 20.23586 14.67526
## 4 9 20.78474 14.44107
## 5 5 21.23067 15.23984
best.step2 %>% slice_min(mae, n=5)
## breaks rmse mae
## 1 10 19.94188 14.00930
## 2 9 20.78474 14.44107
## 3 12 20.20257 14.48694
## 4 11 20.23586 14.67526
## 5 5 21.23067 15.23984
Regresi Spline
spline.met2 <- map_dfr(cross.val2$splits,
function(x){
mod <- lm(horsepower ~ bs(mpg, knots=knots.val.auto),data=autodt[x$in_id,])
pred <- predict(mod, newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$horsepower
rmse <- mlr3measures::rmse(truth = truth,response=pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
## Warning in bs(mpg, degree = 3L, knots = c(`25%` = 17, `50%` = 22.75, `75%` = 29:
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(mpg, degree = 3L, knots = c(`25%` = 17, `50%` = 22.75, `75%` = 29:
## some 'x' values beyond boundary knots may cause ill-conditioned bases
m_spline.met2 <- colMeans(spline.met2)
m_spline.met2
## rmse mae
## 19.53639 14.09720
Regresi Natural Spline
nspl.met2 <- map_dfr(cross.val2$splits,
function(x){
mod <- lm(horsepower ~ ns(mpg,knots=knots.val.auto),data=autodt[x$in_id,])
pred <- predict(mod,newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$horsepower
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
m_nspl.met2 <- colMeans(nspl.met2)
m_nspl.met2
## rmse mae
## 19.53694 14.16507
Regresi: LOESS
span <- seq(0.1, 1, length.out=91)
best.loess2 <- map_dfr(span, function(i){
loess.met2 <- map_dfr(cross.val1$splits, function(x){
mod <- loess(horsepower ~ mpg, span=i, data=autodt[x$in_id,])
pred <- predict(mod, newdata=autodt[-x$in_id,])
truth <- autodt[-x$in_id,]$horsepower
data_eval <- na.omit(data.frame(pred=pred, truth=truth))
rmse <- mlr3measures::rmse(truth=data_eval$truth, response=data_eval$pred)
mae <- mlr3measures::mae(truth=data_eval$truth, response=data_eval$pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
head(loess.met2,20)
#Menghitung rata-rata untuk 10 folds
m_loess.met2 <- colMeans(loess.met2)
m_loess.met2
}
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6453e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.7908e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.0291e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.6179e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.2038e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1236e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.4554e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.6336e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1277e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.5701e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.2038e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6453e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.8633e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.5701e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.8633e-016
best.loess2 <- cbind(span=span,best.loess2)
best.loess2 %>% slice_min(rmse, n=1)
## span rmse mae
## 1 0.33 19.26358 13.82094
best.loess2 %>% slice_min(mae, n=1)
## span rmse mae
## 1 0.32 19.27623 13.813
Berdasarkan nilai rmse terkecil, best LOESS pada span 0.33, sedangkan best LOESS pada span 0.32 berdasarkan nilai mae terkecil.
Perbandingan Akurasi Model
df.cv2 <- rbind(m_lin.met2, m_pol.met2, best.step2[8,2:3], m_spline.met2, m_nspl.met2, best.loess2[47,-1])
rownames(df.cv2) <- c("Linear", "Polinomial(8)", "Fungsi Tangga(10)", "Spline", "Natural Spline", "Fungsi LOESS(0.32)")
kableExtra::kable(head(df.cv2) ,caption = 'Perbandingan Akurasi Model dengan *Cross Validation Evaluation* (horsepower ~ mpg)')
| rmse | mae | |
|---|---|---|
| Linear | 24.07495 | 18.46422 |
| Polinomial(8) | 20.50232 | 14.37619 |
| Fungsi Tangga(10) | 19.94188 | 14.00930 |
| Spline | 19.53639 | 14.09720 |
| Natural Spline | 19.53694 | 14.16507 |
| Fungsi LOESS(0.32) | 19.26656 | 13.86220 |
Berdasarkan nilai rmse dan mae terkecil, dengan menggunakan cross-validation evaluation 10 folds, model regresi fungsi LOESS dengan span sebesar 0.32 merupakan model terbaik untuk korelasi model horsepower ~ mpg dibandingkan model lainnya.