library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6 v purrr 0.3.4
## v tibble 3.1.8 v dplyr 1.0.10
## v tidyr 1.2.1 v stringr 1.4.1
## v readr 2.1.2 v forcats 0.5.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
library(purrr)
library(rsample) #u/ fungsi cross validasi
library(mlr3measures)
## In order to avoid name clashes, do not attach 'mlr3measures'. Instead, only load the namespace with `requireNamespace("mlrmeasures")` and access the measures directly via `::`, e.g. `mlr3measures::auc()`.
library(MultiKink)
library(splines)
1.2. Deskripsi Data Data yang digunakan adalah data yang diambil pada fungsi set.seed 80. Diambil 1000 data yang menyebar secara normal dengan simpangan baku 2 dan ragam 4. Model yang digunakan adalah model polynomial ordo 2 dengan persamaan sebagai berikut: y = 5 + 2(data.x) + 3(data.x)^2 + error
set.seed(80)
data.x <- rnorm(1000,1,1)
err <- rnorm(1000)
y <- 5+2*data.x+3*data.x^2+err
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), main = "Pemodelan Regresi Linear")
Berdasarkan output di atas, dapat dilihat bahwa grafik membentuk pola
tidak linier. Hal ini dapat dilihat bahwa pola grafik tidak membentuk
garis lurus. polynomial ordo 2 = melengkung, ada batas maksimum, maka
tidak mungkin nilainya 0/-/lebih dari
Regresi linier adalah metode statistika yang digunakan untuk membentuk model atau hubungan antara satu atau lebih variabel bebas X dengan sebuah variabel respon Y. Berdasarkan data sebelumnya, dilakukan pembuatan garis linier pada grafik
lin.mod <-lm( y~data.x)
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), main = "Pemodelan Regresi Linear")
lines(data.x,lin.mod$fitted.values,col="green")
summary(lin.mod)
##
## Call:
## lm(formula = y ~ data.x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.778 -2.671 -1.390 0.993 32.611
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.0964 0.2069 24.63 <2e-16 ***
## data.x 7.8286 0.1501 52.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.667 on 998 degrees of freedom
## Multiple R-squared: 0.7316, Adjusted R-squared: 0.7314
## F-statistic: 2721 on 1 and 998 DF, p-value: < 2.2e-16
Berdasarkan output di atas, dapat dilihat bahwa garis regresi linier tidak mengikuti pola data maka tidak tepat menggunakan regresi linier biasa
pol.mod <- lm( y~data.x+I(data.x^2))
ix <- sort( data.x,index.return=T)$ix
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), main = "Pemodelan Polynomial\nOrdo = 2")
lines(data.x[ix], pol.mod$fitted.values[ix],col="green", cex=2)
summary(pol.mod)
##
## Call:
## lm(formula = y ~ data.x + I(data.x^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1434 -0.6471 0.0309 0.6498 3.4073
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.99016 0.04510 110.64 <2e-16 ***
## data.x 1.97009 0.05277 37.33 <2e-16 ***
## I(data.x^2) 3.03404 0.02145 141.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.017 on 997 degrees of freedom
## Multiple R-squared: 0.9873, Adjusted R-squared: 0.9872
## F-statistic: 3.865e+04 on 2 and 997 DF, p-value: < 2.2e-16
Berdasarkan output diatas, dapat dilihat bahwa garis mengikuti pola data. Hal ini sudah sesuai dengan model yang dibangun, yaitu model polynomial ordo 2
#regresi fungsi tangga
range(data.x)
## [1] -2.204434 4.312776
c1 <- as.factor(ifelse(data.x<=0,1,0)) #c1 = membentuk x vector jika data x lebih kecil dari 0 maka dikategorikan 1 sisanya 0, dimaksudkan u/ membentuk titik, c1 < 0
c2 <- as.factor(ifelse(data.x<=2 & data.x>0,1,0)) #0 < c2 < 1
c3 <- as.factor(ifelse(data.x>2,1,0)) #c3 > 2
data.frame(y,c1,c2,c3) #c1,c2,c3 seolah dibuat dummy, sudah tidak ada x nya karena sudah diubah menjadi bentuk kategorik
## y c1 c2 c3
## 1 7.460039 0 1 0
## 2 18.720336 0 1 0
## 3 12.234188 0 1 0
## 4 16.793363 0 1 0
## 5 12.880846 0 1 0
## 6 9.382632 0 1 0
## 7 11.786535 0 1 0
## 8 8.332639 0 1 0
## 9 8.074686 0 1 0
## 10 10.547590 0 1 0
## 11 9.998711 0 1 0
## 12 5.001363 1 0 0
## 13 4.676500 1 0 0
## 14 25.784374 0 0 1
## 15 9.324610 0 1 0
## 16 17.876007 0 1 0
## 17 6.752207 0 1 0
## 18 8.908747 0 1 0
## 19 23.543221 0 0 1
## 20 6.054814 0 1 0
## 21 4.177452 0 1 0
## 22 4.744742 0 1 0
## 23 17.966141 0 1 0
## 24 6.376426 0 1 0
## 25 16.081890 0 1 0
## 26 7.228842 0 1 0
## 27 45.190775 0 0 1
## 28 51.351951 0 0 1
## 29 28.543775 0 0 1
## 30 3.298763 1 0 0
## 31 11.023058 1 0 0
## 32 13.129733 0 1 0
## 33 18.416524 0 1 0
## 34 4.985485 1 0 0
## 35 10.567597 0 1 0
## 36 15.113703 0 1 0
## 37 3.603752 1 0 0
## 38 5.058043 1 0 0
## 39 26.044554 0 0 1
## 40 3.699021 1 0 0
## 41 17.034739 0 1 0
## 42 4.869586 0 1 0
## 43 3.049262 1 0 0
## 44 6.924424 0 1 0
## 45 14.014208 0 1 0
## 46 6.872871 0 1 0
## 47 10.460989 0 1 0
## 48 11.258279 0 1 0
## 49 4.171444 0 1 0
## 50 10.671837 0 1 0
## 51 8.915436 0 1 0
## 52 5.560528 0 1 0
## 53 13.826579 0 1 0
## 54 18.429410 0 1 0
## 55 5.175453 0 1 0
## 56 23.031527 0 0 1
## 57 5.169754 0 1 0
## 58 4.120357 1 0 0
## 59 22.097362 0 1 0
## 60 12.832949 0 1 0
## 61 7.230307 0 1 0
## 62 27.675429 0 0 1
## 63 6.992608 0 1 0
## 64 18.859413 0 1 0
## 65 17.485896 0 1 0
## 66 10.688558 0 1 0
## 67 33.086222 0 0 1
## 68 8.208631 0 1 0
## 69 4.133226 1 0 0
## 70 13.078604 0 1 0
## 71 6.652890 0 1 0
## 72 20.137342 0 1 0
## 73 8.311204 0 1 0
## 74 7.067745 0 1 0
## 75 6.191176 0 1 0
## 76 21.262219 0 0 1
## 77 53.142852 0 0 1
## 78 69.858549 0 0 1
## 79 5.780768 0 1 0
## 80 6.280205 0 1 0
## 81 7.205656 0 1 0
## 82 8.873775 0 1 0
## 83 14.130448 0 1 0
## 84 9.797272 0 1 0
## 85 6.139218 1 0 0
## 86 10.671931 0 1 0
## 87 12.475637 0 1 0
## 88 13.156753 1 0 0
## 89 14.151682 0 1 0
## 90 23.694312 0 0 1
## 91 8.345464 0 1 0
## 92 4.695928 1 0 0
## 93 8.457792 1 0 0
## 94 5.091153 0 1 0
## 95 12.107265 0 1 0
## 96 7.823002 0 1 0
## 97 30.135677 0 0 1
## 98 13.723614 0 1 0
## 99 21.551479 0 1 0
## 100 11.226234 0 1 0
## 101 52.679858 0 0 1
## 102 20.073850 0 1 0
## 103 8.966396 0 1 0
## 104 3.745109 1 0 0
## 105 14.843467 0 1 0
## 106 14.498447 0 1 0
## 107 6.672393 1 0 0
## 108 27.447114 0 0 1
## 109 6.565084 0 1 0
## 110 6.155751 0 1 0
## 111 8.974026 0 1 0
## 112 7.242545 0 1 0
## 113 15.332879 0 1 0
## 114 6.339247 0 1 0
## 115 19.621988 0 1 0
## 116 7.620199 0 1 0
## 117 11.103401 0 1 0
## 118 6.511126 1 0 0
## 119 7.296162 0 1 0
## 120 4.312438 0 1 0
## 121 23.002422 0 0 1
## 122 6.440420 1 0 0
## 123 8.691454 0 1 0
## 124 27.074635 0 0 1
## 125 13.182745 0 1 0
## 126 4.366948 1 0 0
## 127 4.940403 0 1 0
## 128 5.256926 1 0 0
## 129 7.579026 0 1 0
## 130 9.138239 0 1 0
## 131 4.290607 1 0 0
## 132 23.339706 0 0 1
## 133 26.142123 0 0 1
## 134 10.868249 0 1 0
## 135 29.745184 0 0 1
## 136 6.650701 0 1 0
## 137 10.066924 0 1 0
## 138 11.170452 0 1 0
## 139 9.130132 0 1 0
## 140 16.500073 0 1 0
## 141 15.608908 0 1 0
## 142 48.658106 0 0 1
## 143 3.708351 1 0 0
## 144 5.019956 0 1 0
## 145 4.074372 1 0 0
## 146 14.505924 0 1 0
## 147 11.886692 0 1 0
## 148 7.098753 0 1 0
## 149 24.497648 0 0 1
## 150 6.120259 0 1 0
## 151 8.064809 0 1 0
## 152 8.159864 0 1 0
## 153 6.791010 0 1 0
## 154 9.772899 0 1 0
## 155 5.697713 0 1 0
## 156 23.655838 0 0 1
## 157 7.151591 0 1 0
## 158 10.398326 0 1 0
## 159 4.118972 1 0 0
## 160 20.056272 0 1 0
## 161 23.746945 0 0 1
## 162 20.625570 0 1 0
## 163 12.732064 0 1 0
## 164 35.076414 0 0 1
## 165 4.830186 0 1 0
## 166 14.188785 0 1 0
## 167 10.068365 0 1 0
## 168 16.452616 0 1 0
## 169 16.958520 0 1 0
## 170 6.931475 1 0 0
## 171 7.200346 0 1 0
## 172 30.479378 0 0 1
## 173 13.652970 0 1 0
## 174 5.127803 1 0 0
## 175 25.168862 0 0 1
## 176 6.930339 0 1 0
## 177 9.048285 0 1 0
## 178 10.630675 0 1 0
## 179 8.955461 0 1 0
## 180 20.096163 0 1 0
## 181 10.985342 0 1 0
## 182 7.182534 0 1 0
## 183 6.475949 1 0 0
## 184 4.622399 0 1 0
## 185 4.912857 1 0 0
## 186 10.681078 0 1 0
## 187 23.906994 0 0 1
## 188 10.693303 0 1 0
## 189 4.720015 1 0 0
## 190 3.120680 1 0 0
## 191 13.974030 0 1 0
## 192 7.597851 0 1 0
## 193 10.967404 0 1 0
## 194 15.393092 0 1 0
## 195 22.876873 0 0 1
## 196 13.513171 0 1 0
## 197 4.988078 1 0 0
## 198 4.951849 1 0 0
## 199 4.412475 0 1 0
## 200 10.011153 0 1 0
## 201 14.550077 0 1 0
## 202 5.797343 0 1 0
## 203 5.385943 1 0 0
## 204 28.611263 0 0 1
## 205 12.825159 0 1 0
## 206 12.216041 0 1 0
## 207 4.687686 1 0 0
## 208 24.642503 0 0 1
## 209 4.253179 1 0 0
## 210 10.177149 0 1 0
## 211 6.580269 0 1 0
## 212 5.297600 0 1 0
## 213 17.949470 0 1 0
## 214 30.504211 0 0 1
## 215 3.998230 0 1 0
## 216 9.257579 0 1 0
## 217 4.560934 1 0 0
## 218 25.944682 0 0 1
## 219 4.618676 1 0 0
## 220 7.926079 0 1 0
## 221 13.908258 0 1 0
## 222 12.245846 0 1 0
## 223 5.223676 1 0 0
## 224 13.527178 0 1 0
## 225 8.611175 0 1 0
## 226 14.219162 0 1 0
## 227 12.184683 0 1 0
## 228 8.309372 0 1 0
## 229 10.536896 0 1 0
## 230 8.543105 0 1 0
## 231 4.941952 0 1 0
## 232 6.242761 0 1 0
## 233 4.469688 0 1 0
## 234 22.787779 0 0 1
## 235 9.912636 0 1 0
## 236 5.720128 0 1 0
## 237 5.765977 0 1 0
## 238 12.300757 0 1 0
## 239 23.989259 0 0 1
## 240 8.491524 0 1 0
## 241 16.968699 0 1 0
## 242 9.109511 0 1 0
## 243 19.250778 0 1 0
## 244 31.039787 0 0 1
## 245 11.695860 0 1 0
## 246 21.154760 0 0 1
## 247 8.977943 0 1 0
## 248 20.175740 0 1 0
## 249 36.789267 0 0 1
## 250 18.881276 0 1 0
## 251 12.098408 0 1 0
## 252 18.015440 0 1 0
## 253 17.286900 0 1 0
## 254 4.587089 1 0 0
## 255 9.684935 0 1 0
## 256 8.619717 0 1 0
## 257 8.478274 0 1 0
## 258 11.947162 0 1 0
## 259 14.737525 0 1 0
## 260 4.157048 1 0 0
## 261 15.365730 0 1 0
## 262 8.586980 0 1 0
## 263 5.607535 1 0 0
## 264 4.012376 1 0 0
## 265 12.213233 0 1 0
## 266 5.092553 1 0 0
## 267 6.688864 1 0 0
## 268 12.818349 0 1 0
## 269 11.547602 0 1 0
## 270 4.917321 0 1 0
## 271 5.613580 0 1 0
## 272 9.657897 1 0 0
## 273 13.152450 0 1 0
## 274 20.202528 0 1 0
## 275 26.332371 0 0 1
## 276 18.342383 0 1 0
## 277 10.440476 0 1 0
## 278 5.973642 1 0 0
## 279 6.149892 0 1 0
## 280 4.102984 1 0 0
## 281 12.216840 0 1 0
## 282 15.137136 0 1 0
## 283 50.628251 0 0 1
## 284 11.155486 0 1 0
## 285 5.680913 1 0 0
## 286 8.354803 0 1 0
## 287 20.640971 0 1 0
## 288 12.248726 0 1 0
## 289 21.717721 0 0 1
## 290 14.553105 0 1 0
## 291 8.420617 0 1 0
## 292 7.073412 0 1 0
## 293 7.876388 0 1 0
## 294 24.107645 0 0 1
## 295 14.198102 0 1 0
## 296 15.099928 1 0 0
## 297 5.604440 1 0 0
## 298 15.218633 0 1 0
## 299 19.700381 0 1 0
## 300 31.242389 0 0 1
## 301 5.569759 0 1 0
## 302 12.409098 0 1 0
## 303 4.590129 1 0 0
## 304 6.049456 0 1 0
## 305 4.820753 0 1 0
## 306 15.484357 0 1 0
## 307 9.262412 0 1 0
## 308 11.661856 0 1 0
## 309 9.208189 0 1 0
## 310 17.783691 0 1 0
## 311 22.735926 0 0 1
## 312 5.177103 1 0 0
## 313 7.126947 1 0 0
## 314 18.349235 0 1 0
## 315 22.092009 0 0 1
## 316 7.551431 0 1 0
## 317 15.287456 0 1 0
## 318 20.007620 0 1 0
## 319 4.676165 0 1 0
## 320 22.193098 0 0 1
## 321 4.478934 0 1 0
## 322 15.919814 0 1 0
## 323 13.282782 0 1 0
## 324 6.405492 0 1 0
## 325 6.039642 0 1 0
## 326 36.607063 0 0 1
## 327 4.870562 0 1 0
## 328 14.273113 0 1 0
## 329 8.590533 0 1 0
## 330 6.053648 0 1 0
## 331 15.058205 0 1 0
## 332 8.499993 0 1 0
## 333 44.130643 0 0 1
## 334 5.438441 1 0 0
## 335 21.466603 0 0 1
## 336 7.428470 1 0 0
## 337 11.943171 0 1 0
## 338 12.134077 0 1 0
## 339 9.398121 0 1 0
## 340 14.860335 0 1 0
## 341 37.058490 0 0 1
## 342 7.319643 0 1 0
## 343 12.302630 0 1 0
## 344 6.340575 1 0 0
## 345 4.967654 0 1 0
## 346 7.167170 0 1 0
## 347 22.272494 0 0 1
## 348 16.628063 0 1 0
## 349 13.170393 0 1 0
## 350 17.210902 0 1 0
## 351 20.616984 0 0 1
## 352 18.940644 0 1 0
## 353 11.237696 0 1 0
## 354 6.500461 0 1 0
## 355 4.160429 1 0 0
## 356 12.968657 0 1 0
## 357 7.375916 0 1 0
## 358 10.390376 0 1 0
## 359 9.175777 1 0 0
## 360 6.901291 1 0 0
## 361 9.772433 0 1 0
## 362 7.737809 0 1 0
## 363 5.912493 1 0 0
## 364 3.843643 1 0 0
## 365 3.965337 1 0 0
## 366 5.730629 1 0 0
## 367 12.967881 0 1 0
## 368 54.190845 0 0 1
## 369 18.191529 0 1 0
## 370 3.085142 0 1 0
## 371 5.132724 1 0 0
## 372 4.196771 1 0 0
## 373 3.816978 1 0 0
## 374 11.590138 0 1 0
## 375 9.409426 0 1 0
## 376 20.384462 0 1 0
## 377 45.450946 0 0 1
## 378 6.513151 0 1 0
## 379 18.160718 0 1 0
## 380 10.588931 0 1 0
## 381 3.233813 1 0 0
## 382 29.292236 0 0 1
## 383 12.454940 0 1 0
## 384 21.010861 0 0 1
## 385 13.432604 0 1 0
## 386 12.920700 0 1 0
## 387 7.306792 0 1 0
## 388 49.535573 0 0 1
## 389 13.647951 0 1 0
## 390 8.783698 0 1 0
## 391 14.533750 0 1 0
## 392 5.612195 0 1 0
## 393 8.286673 0 1 0
## 394 11.768885 0 1 0
## 395 6.644049 0 1 0
## 396 9.369998 0 1 0
## 397 6.755446 1 0 0
## 398 10.317889 0 1 0
## 399 5.086694 1 0 0
## 400 29.956478 0 0 1
## 401 7.266284 0 1 0
## 402 4.343797 1 0 0
## 403 6.624977 0 1 0
## 404 9.838833 0 1 0
## 405 9.981142 0 1 0
## 406 7.222510 0 1 0
## 407 10.200612 0 1 0
## 408 19.677587 0 1 0
## 409 5.832106 1 0 0
## 410 11.989274 0 1 0
## 411 4.144872 1 0 0
## 412 6.573466 1 0 0
## 413 5.226593 1 0 0
## 414 25.249502 0 0 1
## 415 4.158671 1 0 0
## 416 6.077140 0 1 0
## 417 21.993058 0 0 1
## 418 15.442194 0 1 0
## 419 12.813601 0 1 0
## 420 14.008255 0 1 0
## 421 8.397725 0 1 0
## 422 13.764640 0 1 0
## 423 8.826146 0 1 0
## 424 10.219088 0 1 0
## 425 9.406242 0 1 0
## 426 8.225690 0 1 0
## 427 12.055674 0 1 0
## 428 16.710752 0 1 0
## 429 15.294618 0 1 0
## 430 24.664369 0 0 1
## 431 5.686713 0 1 0
## 432 6.660820 0 1 0
## 433 14.847960 0 1 0
## 434 17.272983 0 1 0
## 435 9.909901 0 1 0
## 436 18.099722 0 1 0
## 437 12.972596 0 1 0
## 438 12.622746 0 1 0
## 439 43.025858 0 0 1
## 440 28.628375 0 0 1
## 441 5.728209 0 1 0
## 442 23.507552 0 0 1
## 443 34.218917 0 0 1
## 444 6.748442 0 1 0
## 445 11.461649 0 1 0
## 446 5.824669 1 0 0
## 447 6.133498 0 1 0
## 448 5.822883 0 1 0
## 449 6.739221 1 0 0
## 450 5.871472 0 1 0
## 451 15.392808 0 1 0
## 452 17.226307 0 1 0
## 453 16.331303 0 1 0
## 454 19.864549 0 1 0
## 455 12.692323 0 1 0
## 456 8.484273 0 1 0
## 457 14.316642 0 1 0
## 458 10.979497 0 1 0
## 459 8.386589 0 1 0
## 460 12.915105 0 1 0
## 461 14.975230 0 1 0
## 462 8.134427 0 1 0
## 463 11.223296 0 1 0
## 464 13.243946 0 1 0
## 465 7.671230 0 1 0
## 466 20.054757 0 1 0
## 467 14.382046 0 1 0
## 468 4.358395 1 0 0
## 469 58.257488 0 0 1
## 470 23.254847 0 0 1
## 471 6.988362 0 1 0
## 472 5.528787 0 1 0
## 473 24.979644 0 0 1
## 474 5.208263 0 1 0
## 475 12.087119 0 1 0
## 476 9.840352 0 1 0
## 477 9.664485 0 1 0
## 478 11.429893 0 1 0
## 479 16.413950 0 1 0
## 480 9.486561 0 1 0
## 481 5.442948 1 0 0
## 482 17.200186 0 1 0
## 483 13.543023 0 1 0
## 484 27.294918 0 0 1
## 485 3.997457 0 1 0
## 486 8.806135 0 1 0
## 487 12.645640 0 1 0
## 488 12.613473 0 1 0
## 489 7.482137 0 1 0
## 490 28.843785 0 0 1
## 491 10.223579 0 1 0
## 492 5.730109 0 1 0
## 493 15.468664 0 1 0
## 494 6.962791 1 0 0
## 495 12.151426 0 1 0
## 496 11.163972 0 1 0
## 497 13.627963 0 1 0
## 498 10.773403 0 1 0
## 499 14.214019 0 1 0
## 500 5.702942 1 0 0
## 501 28.961585 0 0 1
## 502 6.217967 0 1 0
## 503 21.404740 0 1 0
## 504 20.392533 0 1 0
## 505 8.434649 0 1 0
## 506 19.723052 0 1 0
## 507 5.115217 0 1 0
## 508 11.722383 0 1 0
## 509 7.724138 0 1 0
## 510 5.853848 0 1 0
## 511 4.738544 1 0 0
## 512 31.541388 0 0 1
## 513 4.178424 1 0 0
## 514 6.010584 1 0 0
## 515 8.015592 1 0 0
## 516 10.249772 1 0 0
## 517 4.927438 1 0 0
## 518 13.741500 0 1 0
## 519 19.801864 0 1 0
## 520 14.813367 0 1 0
## 521 14.795153 0 1 0
## 522 5.309093 1 0 0
## 523 4.278489 0 1 0
## 524 23.171971 0 0 1
## 525 15.536140 0 1 0
## 526 5.689830 1 0 0
## 527 12.580807 0 1 0
## 528 21.176552 0 0 1
## 529 18.388333 0 1 0
## 530 49.171785 0 0 1
## 531 12.707594 0 1 0
## 532 6.141489 0 1 0
## 533 4.452276 1 0 0
## 534 6.097846 1 0 0
## 535 5.636783 1 0 0
## 536 13.066174 0 1 0
## 537 5.505306 0 1 0
## 538 7.732897 0 1 0
## 539 16.177593 0 1 0
## 540 17.578242 0 1 0
## 541 11.999582 0 1 0
## 542 12.771341 0 1 0
## 543 5.989799 0 1 0
## 544 7.119831 0 1 0
## 545 5.870474 0 1 0
## 546 8.600729 0 1 0
## 547 4.898956 0 1 0
## 548 6.611058 1 0 0
## 549 11.274746 0 1 0
## 550 17.794093 0 1 0
## 551 14.544907 0 1 0
## 552 11.959826 0 1 0
## 553 6.817831 1 0 0
## 554 6.303762 1 0 0
## 555 38.257925 0 0 1
## 556 20.688120 0 1 0
## 557 45.380737 0 0 1
## 558 22.888636 0 0 1
## 559 7.445987 0 1 0
## 560 12.707901 0 1 0
## 561 41.243462 0 0 1
## 562 10.764923 0 1 0
## 563 6.830865 0 1 0
## 564 7.572947 0 1 0
## 565 5.853581 0 1 0
## 566 6.151071 1 0 0
## 567 13.297749 0 1 0
## 568 6.261736 0 1 0
## 569 10.481068 0 1 0
## 570 12.881122 0 1 0
## 571 2.297394 1 0 0
## 572 4.573031 1 0 0
## 573 10.118998 0 1 0
## 574 8.688307 0 1 0
## 575 9.205153 0 1 0
## 576 20.316213 0 0 1
## 577 7.654050 1 0 0
## 578 9.476890 0 1 0
## 579 6.428366 1 0 0
## 580 17.512100 0 1 0
## 581 13.460539 0 1 0
## 582 27.571399 0 0 1
## 583 14.357649 0 1 0
## 584 12.813257 0 1 0
## 585 12.733195 0 1 0
## 586 5.654820 1 0 0
## 587 6.284127 0 1 0
## 588 18.222104 0 1 0
## 589 15.095118 0 1 0
## 590 19.822285 0 1 0
## 591 5.044991 1 0 0
## 592 18.575392 0 1 0
## 593 19.484978 0 1 0
## 594 15.594036 0 1 0
## 595 19.594009 0 1 0
## 596 13.547759 0 1 0
## 597 25.185378 0 0 1
## 598 4.088274 1 0 0
## 599 8.329225 0 1 0
## 600 9.218001 0 1 0
## 601 32.893041 0 0 1
## 602 7.143789 0 1 0
## 603 14.727971 0 1 0
## 604 4.508464 1 0 0
## 605 4.786944 1 0 0
## 606 12.693156 0 1 0
## 607 11.436147 0 1 0
## 608 16.276452 0 1 0
## 609 4.927533 1 0 0
## 610 21.720002 0 0 1
## 611 4.943253 1 0 0
## 612 7.445302 0 1 0
## 613 9.915761 0 1 0
## 614 6.220687 0 1 0
## 615 4.014868 0 1 0
## 616 5.705119 1 0 0
## 617 6.036385 0 1 0
## 618 17.400344 0 1 0
## 619 3.042009 1 0 0
## 620 9.289558 0 1 0
## 621 11.503673 0 1 0
## 622 12.022464 1 0 0
## 623 19.416542 0 1 0
## 624 6.547458 0 1 0
## 625 7.530429 0 1 0
## 626 10.277967 0 1 0
## 627 23.481637 0 0 1
## 628 4.088495 1 0 0
## 629 8.445887 0 1 0
## 630 9.395286 0 1 0
## 631 8.113233 0 1 0
## 632 6.024863 1 0 0
## 633 4.003194 1 0 0
## 634 15.763846 0 1 0
## 635 13.535533 0 1 0
## 636 14.103799 0 1 0
## 637 71.406253 0 0 1
## 638 5.360090 1 0 0
## 639 12.369734 0 1 0
## 640 10.594471 0 1 0
## 641 16.819204 0 1 0
## 642 3.911550 1 0 0
## 643 5.245798 1 0 0
## 644 14.435693 0 1 0
## 645 12.282767 0 1 0
## 646 14.748021 0 1 0
## 647 17.426939 0 1 0
## 648 9.123202 0 1 0
## 649 4.090585 0 1 0
## 650 5.702112 1 0 0
## 651 10.763198 0 1 0
## 652 15.834197 0 1 0
## 653 5.845812 0 1 0
## 654 23.588664 0 0 1
## 655 19.080858 0 1 0
## 656 16.525047 0 1 0
## 657 13.196595 0 1 0
## 658 7.606977 0 1 0
## 659 8.975009 0 1 0
## 660 8.931266 0 1 0
## 661 8.847459 0 1 0
## 662 14.835806 0 1 0
## 663 7.706344 0 1 0
## 664 5.517115 0 1 0
## 665 29.265871 0 0 1
## 666 4.565527 0 1 0
## 667 33.109762 0 0 1
## 668 7.520071 0 1 0
## 669 16.721442 0 1 0
## 670 12.183023 0 1 0
## 671 6.783536 1 0 0
## 672 7.433122 0 1 0
## 673 8.623755 0 1 0
## 674 5.250121 1 0 0
## 675 14.807080 0 1 0
## 676 7.081025 0 1 0
## 677 8.906519 0 1 0
## 678 5.607549 0 1 0
## 679 5.035748 1 0 0
## 680 6.235487 0 1 0
## 681 12.543838 0 1 0
## 682 10.783512 0 1 0
## 683 24.496733 0 0 1
## 684 6.387023 1 0 0
## 685 14.153834 0 1 0
## 686 4.384394 0 1 0
## 687 15.067553 0 1 0
## 688 18.097053 0 1 0
## 689 21.790912 0 0 1
## 690 10.259501 0 1 0
## 691 8.220000 0 1 0
## 692 17.530206 0 1 0
## 693 15.942280 0 1 0
## 694 8.991465 0 1 0
## 695 7.112815 0 1 0
## 696 17.821570 0 1 0
## 697 19.384829 0 1 0
## 698 28.119194 0 0 1
## 699 7.488944 0 1 0
## 700 9.277842 0 1 0
## 701 15.839096 0 1 0
## 702 5.807002 1 0 0
## 703 13.770646 0 1 0
## 704 9.939139 0 1 0
## 705 16.087255 0 1 0
## 706 11.768515 0 1 0
## 707 7.847629 0 1 0
## 708 10.826092 0 1 0
## 709 22.386842 0 0 1
## 710 16.629992 0 1 0
## 711 17.845230 0 1 0
## 712 3.684237 1 0 0
## 713 14.692811 0 1 0
## 714 3.498154 1 0 0
## 715 6.166681 0 1 0
## 716 7.804616 0 1 0
## 717 26.587283 0 0 1
## 718 6.719312 0 1 0
## 719 21.362150 0 1 0
## 720 13.750963 0 1 0
## 721 4.823543 0 1 0
## 722 8.552388 0 1 0
## 723 6.270240 1 0 0
## 724 14.923010 0 1 0
## 725 10.755305 0 1 0
## 726 7.289244 1 0 0
## 727 6.127944 0 1 0
## 728 16.118306 0 1 0
## 729 12.513179 0 1 0
## 730 15.957867 0 1 0
## 731 2.344152 1 0 0
## 732 10.124533 0 1 0
## 733 22.400521 0 0 1
## 734 6.512676 0 1 0
## 735 12.057706 0 1 0
## 736 20.751794 0 1 0
## 737 24.992686 0 0 1
## 738 19.793842 0 1 0
## 739 4.197923 1 0 0
## 740 44.160098 0 0 1
## 741 4.876654 1 0 0
## 742 41.387593 0 0 1
## 743 17.478617 0 1 0
## 744 13.949181 0 1 0
## 745 12.393579 0 1 0
## 746 4.657398 1 0 0
## 747 5.054745 1 0 0
## 748 5.388826 0 1 0
## 749 4.848294 1 0 0
## 750 6.775967 1 0 0
## 751 23.569680 0 0 1
## 752 5.924116 1 0 0
## 753 5.588209 1 0 0
## 754 5.286303 1 0 0
## 755 10.412354 0 1 0
## 756 16.747627 0 1 0
## 757 15.448716 0 1 0
## 758 12.608661 0 1 0
## 759 8.777640 0 1 0
## 760 10.043647 1 0 0
## 761 10.094304 0 1 0
## 762 6.256455 1 0 0
## 763 9.946417 0 1 0
## 764 4.935294 0 1 0
## 765 12.845441 0 1 0
## 766 8.967769 0 1 0
## 767 16.685671 0 1 0
## 768 4.874090 0 1 0
## 769 9.417125 0 1 0
## 770 3.378619 1 0 0
## 771 5.566786 0 1 0
## 772 14.091512 0 1 0
## 773 6.233919 1 0 0
## 774 9.998798 0 1 0
## 775 7.209300 0 1 0
## 776 8.291374 0 1 0
## 777 3.656514 1 0 0
## 778 9.127269 0 1 0
## 779 12.730770 0 1 0
## 780 21.374495 0 0 1
## 781 20.937091 0 1 0
## 782 5.127746 0 1 0
## 783 8.150180 0 1 0
## 784 24.310823 0 0 1
## 785 32.346568 0 0 1
## 786 5.432263 0 1 0
## 787 8.887995 0 1 0
## 788 10.799327 0 1 0
## 789 6.697053 0 1 0
## 790 7.312829 0 1 0
## 791 23.993739 0 0 1
## 792 7.822121 0 1 0
## 793 29.655922 0 0 1
## 794 6.065154 0 1 0
## 795 6.942923 0 1 0
## 796 5.215350 0 1 0
## 797 11.600082 0 1 0
## 798 9.424513 0 1 0
## 799 11.773955 0 1 0
## 800 34.070501 0 0 1
## 801 9.123186 0 1 0
## 802 50.734187 0 0 1
## 803 3.652073 1 0 0
## 804 10.168695 0 1 0
## 805 3.987180 0 1 0
## 806 37.878477 0 0 1
## 807 16.943758 0 1 0
## 808 14.487818 0 1 0
## 809 9.685445 0 1 0
## 810 5.783105 0 1 0
## 811 8.279875 0 1 0
## 812 4.833113 0 1 0
## 813 17.256580 0 1 0
## 814 15.635470 0 1 0
## 815 10.740332 0 1 0
## 816 11.008355 0 1 0
## 817 10.287808 0 1 0
## 818 8.813328 0 1 0
## 819 9.279122 0 1 0
## 820 10.378700 0 1 0
## 821 10.845307 0 1 0
## 822 12.938867 0 1 0
## 823 4.661703 1 0 0
## 824 5.050691 0 1 0
## 825 6.012536 0 1 0
## 826 14.903325 0 1 0
## 827 15.078988 0 1 0
## 828 7.171363 0 1 0
## 829 8.654928 0 1 0
## 830 23.978581 0 0 1
## 831 9.000368 0 1 0
## 832 21.741736 0 0 1
## 833 6.968677 0 1 0
## 834 6.146986 0 1 0
## 835 8.419638 0 1 0
## 836 11.914427 0 1 0
## 837 10.918607 0 1 0
## 838 6.176554 0 1 0
## 839 5.574842 0 1 0
## 840 7.051337 1 0 0
## 841 5.174584 0 1 0
## 842 15.449109 0 1 0
## 843 22.340254 0 1 0
## 844 7.635471 1 0 0
## 845 9.195423 0 1 0
## 846 13.206465 0 1 0
## 847 17.911299 0 1 0
## 848 14.531386 0 1 0
## 849 4.691831 1 0 0
## 850 6.565250 0 1 0
## 851 23.667994 0 0 1
## 852 6.843164 0 1 0
## 853 7.919991 0 1 0
## 854 10.522993 0 1 0
## 855 4.917306 0 1 0
## 856 5.541869 1 0 0
## 857 54.181943 0 0 1
## 858 7.723278 0 1 0
## 859 7.019134 0 1 0
## 860 5.910406 1 0 0
## 861 5.635866 0 1 0
## 862 21.309029 0 1 0
## 863 9.063318 0 1 0
## 864 9.572207 0 1 0
## 865 23.320487 0 0 1
## 866 26.171612 0 0 1
## 867 15.031357 0 1 0
## 868 7.929301 0 1 0
## 869 3.794545 1 0 0
## 870 14.734286 0 1 0
## 871 10.178210 0 1 0
## 872 9.828078 0 1 0
## 873 9.767900 0 1 0
## 874 9.328949 0 1 0
## 875 5.505912 0 1 0
## 876 25.566178 0 0 1
## 877 3.688781 0 1 0
## 878 34.172206 0 0 1
## 879 12.085394 1 0 0
## 880 5.098445 0 1 0
## 881 9.820306 0 1 0
## 882 14.785355 0 1 0
## 883 28.722620 0 0 1
## 884 3.849052 1 0 0
## 885 11.623986 0 1 0
## 886 13.800691 0 1 0
## 887 7.663318 0 1 0
## 888 2.382018 0 1 0
## 889 12.307854 0 1 0
## 890 51.230718 0 0 1
## 891 7.609172 0 1 0
## 892 5.857811 0 1 0
## 893 15.689646 0 1 0
## 894 16.524593 0 1 0
## 895 13.216442 0 1 0
## 896 10.869508 0 1 0
## 897 9.185623 0 1 0
## 898 10.744708 0 1 0
## 899 22.290406 0 0 1
## 900 5.642849 0 1 0
## 901 5.350509 1 0 0
## 902 11.369174 0 1 0
## 903 8.160310 0 1 0
## 904 10.351511 0 1 0
## 905 13.098246 0 1 0
## 906 4.269550 1 0 0
## 907 17.108473 0 1 0
## 908 15.213626 0 1 0
## 909 14.017095 0 1 0
## 910 7.316675 0 1 0
## 911 4.771423 0 1 0
## 912 12.919169 0 1 0
## 913 17.723686 0 1 0
## 914 10.744137 0 1 0
## 915 5.427826 1 0 0
## 916 9.734210 0 1 0
## 917 19.628212 0 1 0
## 918 9.506748 0 1 0
## 919 5.770414 0 1 0
## 920 9.561066 0 1 0
## 921 15.584468 0 1 0
## 922 9.102958 0 1 0
## 923 24.858727 0 0 1
## 924 4.723330 0 1 0
## 925 8.713299 0 1 0
## 926 14.789322 0 1 0
## 927 4.066777 1 0 0
## 928 18.859361 0 1 0
## 929 14.457730 0 1 0
## 930 7.125359 0 1 0
## 931 16.089827 0 1 0
## 932 14.141603 0 1 0
## 933 6.978233 0 1 0
## 934 13.357928 0 1 0
## 935 15.768598 0 1 0
## 936 12.226197 0 1 0
## 937 11.269724 0 1 0
## 938 7.504241 0 1 0
## 939 17.369849 0 1 0
## 940 10.694384 0 1 0
## 941 27.647989 0 0 1
## 942 6.185995 0 1 0
## 943 23.368129 0 0 1
## 944 19.411402 0 1 0
## 945 9.614353 0 1 0
## 946 26.359164 0 0 1
## 947 12.985229 0 1 0
## 948 45.394402 0 0 1
## 949 18.989366 0 1 0
## 950 7.636661 1 0 0
## 951 5.775305 0 1 0
## 952 4.201210 1 0 0
## 953 4.901492 0 1 0
## 954 3.475086 0 1 0
## 955 7.483062 0 1 0
## 956 6.369307 1 0 0
## 957 3.953228 1 0 0
## 958 9.130078 0 1 0
## 959 6.396724 0 1 0
## 960 20.271926 0 0 1
## 961 14.585654 0 1 0
## 962 4.664853 1 0 0
## 963 36.547795 0 0 1
## 964 5.867032 1 0 0
## 965 6.512675 1 0 0
## 966 18.045302 0 1 0
## 967 9.640639 0 1 0
## 968 4.792117 1 0 0
## 969 8.852940 1 0 0
## 970 6.134231 0 1 0
## 971 6.779732 0 1 0
## 972 8.728901 0 1 0
## 973 5.519862 1 0 0
## 974 17.002828 0 1 0
## 975 33.696245 0 0 1
## 976 11.889403 0 1 0
## 977 12.319947 0 1 0
## 978 32.022769 0 0 1
## 979 3.369360 1 0 0
## 980 11.764827 0 1 0
## 981 4.967829 1 0 0
## 982 22.736363 0 0 1
## 983 9.958878 0 1 0
## 984 16.868132 0 1 0
## 985 13.395356 0 1 0
## 986 9.912504 0 1 0
## 987 11.725587 0 1 0
## 988 9.525009 0 1 0
## 989 4.633723 1 0 0
## 990 5.973905 0 1 0
## 991 18.777239 0 1 0
## 992 9.093495 0 1 0
## 993 10.565044 0 1 0
## 994 9.325963 0 1 0
## 995 8.295131 0 1 0
## 996 11.516887 0 1 0
## 997 12.079843 0 1 0
## 998 23.631826 0 0 1
## 999 5.828775 1 0 0
## 1000 10.713826 0 1 0
step.mod <- lm(y~c1+c2+c3)
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), main = "Pemodelan Fungsi Tangga")
lines(data.x,lin.mod$fitted.values,col="red")
lines(data.x[ix], step.mod$fitted.values[ix],col="green")
summary(step.mod)
##
## Call:
## lm(formula = y ~ c1 + c2 + c3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.405 -3.486 -0.654 2.415 40.729
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.6772 0.4673 65.65 <2e-16 ***
## c11 -25.1753 0.6207 -40.56 <2e-16 ***
## c21 -19.6237 0.5088 -38.57 <2e-16 ***
## c31 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.328 on 997 degrees of freedom
## Multiple R-squared: 0.6507, Adjusted R-squared: 0.65
## F-statistic: 928.7 on 2 and 997 DF, p-value: < 2.2e-16
Berdasarkan output di atas, garis hijau menggambarkan fungsi tangga dan garis merah menunjukkan fungsi regresi linier. Dapat dilihat bahwa garis hijau menunjukan ada 3 break. Selang 1 adalah data.x dalam rentang -2.2 (data minimum) sampai 0. Selang 2 adalah data.x dalam rentang 0 sampai 2 dan selang 3 adlaah data.x dalam rentang 2 sampai 4.24 (data maksimum). Secara eksploratif, garis fungsi tangga lebih mengikuti pola data dibandingan dengan garis fungsi regresi linear.
Model regresi linier, polynomial ordo 2, dan fungsi tangga dibandingan berdasarkan nilai AIC
nilai_AIC <- rbind(AIC(lin.mod),
AIC(pol.mod),
AIC(step.mod))
nama_model <- c("Linear","Poly (ordo=2)","Tangga (breaks=3)")
data.frame(nama_model,nilai_AIC)
## nama_model nilai_AIC
## 1 Linear 5923.089
## 2 Poly (ordo=2) 2877.074
## 3 Tangga (breaks=3) 6188.695
Dapat dilihat bahwa nilai AIC terkecil adalah model polynomial ordo 2, yaitu 2877.074, maka model polynomial ordo 2 adalah model paling baik
#MSE paling rendah = model paling baik
MSE = function(pred,actual){
mean((pred-actual)^2)
}
MSE(predict(lin.mod),y)
## [1] 21.74127
MSE(predict(pol.mod),y)
## [1] 1.031689
MSE(predict(step.mod),y)
## [1] 28.29876
Dapat dilihat bahwa nilai MSE terkecil adalah model polynomial ordo 2, yaitu 1.031689, maka terbukti model polynomial ordo 2 adalah model paling baik
dt.all <- cbind(y,data.x)
##knots=1
dt1 <- dt.all[data.x <=1,]
dim(dt1)
## [1] 502 2
dt2 <- dt.all[data.x >1,]
dim(dt2)
## [1] 498 2
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70),col="orange",main = "Pemodelan Piecewise Cubic Polynomial")
cub.mod1 <- lm(dt1[,1]~dt1[,2]+I(dt1[,2]^2)+I(dt1[,2]^3))
ix <- sort(dt1[,2], index.return=T)$ix
lines(dt1[ix,2],cub.mod1$fitted.values[ix],col="blue", lwd=2)
cub.mod2 <- lm(dt2[,1]~dt2[,2]+I(dt2[,2]^2)+I(dt2[,2]^3))
ix <- sort(dt2[,2], index.return=T)$ix
lines(dt2[ix,2],cub.mod2$fitted.values[ix],col="blue", lwd=2)
abline(v=5,col="black",lty=5)
Data yang digunakan untuk ilustrasi berasal dari studi antropometri terhadap 892 perempuan di bawah 50 tahun di tiga desa Gambia di Afrika Barat. Data terdiri dari 3 Kolom yaitu Age, Intriceps dan tricepts. Berikut adalah penjelasannya pada masing-masing kolom:
-age : umur responden /n -Intriceps : logaritma dari ketebalan lipatan kulit triceps /n -triceps : ketebalan lipatan kulit triceps /n
Lipatan kulit trisep diperlukan untuk menghitung lingkar otot lengan atas. Ketebalannya memberikan informasi tentang cadangan lemak tubuh, sedangkan massa otot yang dihitung memberikan informasi tentang cadangan protein
data("triceps", package="MultiKink") #y=triceps, x=umur
#Jika kita gambarkan dalam bentuk scatterplot
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
theme_bw()
Berdasarkan pola hubungan yang terlihat pada scatterplot, kita akan
mencoba untuk mencari model yang bisa merepresentasikan pola hubungan
tersebut dengan baik.
mod_linear = lm(triceps~age,data=triceps)
summary(mod_linear)
##
## Call:
## lm(formula = triceps ~ age, data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9512 -2.3965 -0.5154 1.5822 25.1233
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.19717 0.21244 29.17 <2e-16 ***
## age 0.21584 0.01014 21.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.007 on 890 degrees of freedom
## Multiple R-squared: 0.3372, Adjusted R-squared: 0.3365
## F-statistic: 452.8 on 1 and 890 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55,color="brown",pch=8) +
stat_smooth(method = "lm",
formula = y~x,lty = 1,
col = "blue",se = F)+
theme_bw()+labs(title= "Pemodelan Regresi Linear")+theme(plot.title = element_text(size=14L,face = "bold",
hjust=0.5))
Berdasarkan scatterplot di atas, fungsi regresi linear belum mengikuti
pola hubungan pada scatterplot
mod_polinomial2 = lm(triceps ~ poly(age,2,raw = T),
data=triceps)
summary(mod_polinomial2)
##
## Call:
## lm(formula = triceps ~ poly(age, 2, raw = T), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.5677 -2.4401 -0.4587 1.6368 24.9961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.0229191 0.3063806 19.658 < 2e-16 ***
## poly(age, 2, raw = T)1 0.2434733 0.0364403 6.681 4.17e-11 ***
## poly(age, 2, raw = T)2 -0.0006257 0.0007926 -0.789 0.43
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.008 on 889 degrees of freedom
## Multiple R-squared: 0.3377, Adjusted R-squared: 0.3362
## F-statistic: 226.6 on 2 and 889 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "blue",se = T)+
theme_bw()
Berdasarkan scatterplot di atas, fungsi polinomial Ordo 2 belum
mengikuti pola hubungan pada scatterplot
mod_polinomial3 = lm(triceps ~ poly(age,3,raw = T),data=triceps) #3 = ordo (x^1,x^2,x^3)
summary(mod_polinomial3)
##
## Call:
## lm(formula = triceps ~ poly(age, 3, raw = T), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.5832 -1.9284 -0.5415 1.3283 24.4440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.004e+00 3.831e-01 20.893 < 2e-16 ***
## poly(age, 3, raw = T)1 -3.157e-01 7.721e-02 -4.089 4.73e-05 ***
## poly(age, 3, raw = T)2 3.101e-02 3.964e-03 7.824 1.45e-14 ***
## poly(age, 3, raw = T)3 -4.566e-04 5.612e-05 -8.135 1.38e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.868 on 888 degrees of freedom
## Multiple R-squared: 0.3836, Adjusted R-squared: 0.3815
## F-statistic: 184.2 on 3 and 888 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "blue",se = T)+
theme_bw()
Visualisasi ordo 3 lebih baikkarena regresi polynomial ordo 3 lebih
mengikuti pola hubungan scatterplot dibandingkan dengan regresi linear
dan plynomial ordo 2
AIC(mod_linear)
## [1] 5011.515
AIC(mod_polinomial2)
## [1] 5012.89
AIC(mod_polinomial3)
## [1] 4950.774
AIC <- cbind(AIC(mod_linear),AIC(mod_polinomial2),AIC(mod_polinomial3))
Ketiga model dibandingkan dengan melihat nilai AIC dari masing-masing model. Model polynomial ordo 3 adalah model terbaik dengan nilai AIC terkecil, yaitu 4950.774
mod_tangga5 = lm(triceps ~ cut(age,5),data=triceps) #cut=break artinya ada 5x5 cut nya (tangga)
summary(mod_tangga5)
##
## Call:
## lm(formula = triceps ~ cut(age, 5), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.5474 -2.0318 -0.4465 1.3682 23.3759
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.2318 0.1994 36.260 < 2e-16 ***
## cut(age, 5)(10.6,20.9] 1.6294 0.3244 5.023 6.16e-07 ***
## cut(age, 5)(20.9,31.2] 5.9923 0.4222 14.192 < 2e-16 ***
## cut(age, 5)(31.2,41.5] 7.5156 0.4506 16.678 < 2e-16 ***
## cut(age, 5)(41.5,51.8] 7.4561 0.5543 13.452 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.939 on 887 degrees of freedom
## Multiple R-squared: 0.3617, Adjusted R-squared: 0.3588
## F-statistic: 125.7 on 4 and 887 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,5),
lty = 1, col = "blue",se = F)+
theme_bw()
Scatterplot di atas menggambarkan fungsi tangga pada 5 selang. Data age
dibagi menjadi 5 selang yang sama panjang. Berdasarkan scatterplot di
atas, dapat dilihat bahwa fungsi tangga sedikit mengikuti pola data pada
scatterplot
mod_tangga7 = lm(triceps ~ cut(age,7),data=triceps)
summary(mod_tangga7)
##
## Call:
## lm(formula = triceps ~ cut(age, 7), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.8063 -1.7592 -0.4366 1.2894 23.1461
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.5592 0.2219 34.060 < 2e-16 ***
## cut(age, 7)(7.62,15] -0.6486 0.3326 -1.950 0.0515 .
## cut(age, 7)(15,22.3] 3.4534 0.4239 8.146 1.27e-15 ***
## cut(age, 7)(22.3,29.7] 5.8947 0.4604 12.804 < 2e-16 ***
## cut(age, 7)(29.7,37] 7.8471 0.5249 14.949 < 2e-16 ***
## cut(age, 7)(37,44.4] 6.9191 0.5391 12.835 < 2e-16 ***
## cut(age, 7)(44.4,51.8] 6.3013 0.6560 9.606 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.805 on 885 degrees of freedom
## Multiple R-squared: 0.4055, Adjusted R-squared: 0.4014
## F-statistic: 100.6 on 6 and 885 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,7),
lty = 1, col = "blue",se = F)+
theme_bw()
Scatterplot di atas menggambarkan fungsi tangga dengan 7 selang. Data
age dibagi menjadi 7 selang yang sama panjang. Garis pada fungsi tangga
dengan 7 selang ini mengikuti pola data pada scatterplot di atas. Hal
ini dapat dilihat bahwa pada selang 1 sampai 5, fungsi tangga selalu
meningkat dan terjadi penurunan pada selang 6 dan 7
Kelima model tersebut kemudian dibandingkan berdasarkan nilai MSE
MSE = function(pred,actual){
mean((pred-actual)^2)
}
nilai_MSE <- rbind(MSE(predict(mod_linear),triceps$triceps),
MSE(predict(mod_polinomial2),triceps$triceps),
MSE(predict(mod_polinomial3),triceps$triceps),
MSE(predict(mod_tangga5),triceps$triceps),
MSE(predict(mod_tangga7),triceps$triceps))
nama_model <- c("Linear","Polynomial(2)", "Polynomial(3)",
"Tangga (Breaks=5)", "Tangga (Breaks=7)")
data.frame(nama_model,nilai_MSE)
## nama_model nilai_MSE
## 1 Linear 16.01758
## 2 Polynomial(2) 16.00636
## 3 Polynomial(3) 14.89621
## 4 Tangga (Breaks=5) 15.42602
## 5 Tangga (Breaks=7) 14.36779
Model dengan nila MSE terkecil dapat dikatakan adalah model yang paling merepresentasikan pola data pada scatterplot. Dapat dilihat bahwa model dengan nilai MSE paling kecil adalah model fungsi tangga dengan selang 7. Nilai MSE yang dihasilkan adalah 14.36779
Cross validation atau dapat disebut estimasi rotasi adalah sebuah teknik validasi model untuk menilai bagaimana hasil statistik analisis akan menggeneralisasi kumpulan data independen. Teknik ini utamanya digunakan untuk melakukan prediksi model dan memperkirakan seberapa akurat sebuah model prediktif ketika dijalankan dalam praktiknya # 4.1 Regresi Linear
set.seed(80)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
metric_linear <- map_dfr(cross_val$splits,
function(x){
mod <- lm(triceps ~ age,data=triceps[x$in_id,])
pred <- predict(mod,newdata=triceps[-x$in_id,])
truth <- triceps[-x$in_id,]$triceps
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_linear
## # A tibble: 10 x 2
## rmse mae
## <dbl> <dbl>
## 1 4.17 3.00
## 2 4.53 3.15
## 3 3.70 2.70
## 4 4.49 2.98
## 5 3.09 2.29
## 6 3.73 2.69
## 7 3.21 2.50
## 8 4.22 3.03
## 9 4.12 3.04
## 10 4.57 2.99
# menghitung rata-rata untuk 10 folds
mean_metric_linear <- colMeans(metric_linear)
mean_metric_linear
## rmse mae
## 3.983055 2.837318
set.seed(80)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
metric_poly2 <- map_dfr(cross_val$splits,
function(x){
mod <- lm(triceps ~ poly(age,2,raw = T),data=triceps[x$in_id,])
pred <- predict(mod,newdata=triceps[-x$in_id,])
truth <- triceps[-x$in_id,]$triceps
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_poly2
## # A tibble: 10 x 2
## rmse mae
## <dbl> <dbl>
## 1 4.18 3.03
## 2 4.58 3.21
## 3 3.72 2.72
## 4 4.49 2.98
## 5 3.09 2.30
## 6 3.72 2.71
## 7 3.20 2.51
## 8 4.23 3.05
## 9 4.11 3.04
## 10 4.57 3.01
# menghitung rata-rata untuk 10 folds
mean_metric_poly2 <- colMeans(metric_poly2)
mean_metric_poly2
## rmse mae
## 3.988963 2.856220
set.seed(80)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
metric_poly3 <- map_dfr(cross_val$splits,
function(x){
mod <- lm(triceps ~ poly(age,3,raw = T),data=triceps[x$in_id,])
pred <- predict(mod,newdata=triceps[-x$in_id,])
truth <- triceps[-x$in_id,]$triceps
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_poly3
## # A tibble: 10 x 2
## rmse mae
## <dbl> <dbl>
## 1 4.21 2.89
## 2 4.70 3.13
## 3 3.51 2.44
## 4 4.22 2.71
## 5 2.98 2.09
## 6 3.56 2.43
## 7 3.00 2.24
## 8 4.30 3.04
## 9 3.79 2.76
## 10 4.29 2.71
# menghitung rata-rata untuk 10 folds
mean_metric_poly3 <- colMeans(metric_poly3)
mean_metric_poly3
## rmse mae
## 3.855210 2.643331
set.seed(80)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
breaks <- 3:10
best_tangga <- map_dfr(breaks, function(i){
metric_tangga <- map_dfr(cross_val$splits, #metric =ukuran
function(x){
training <- triceps[x$in_id,]
training$age <- cut(training$age,i)
mod <- lm(triceps ~ age,data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))
testing <- triceps[-x$in_id,]
age_new <- cut(testing$age,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
pred <- predict(mod,newdata=list(age=age_new))
truth <- testing$triceps
data_eval <- na.omit(data.frame(truth,pred))
rmse <- mlr3measures::rmse(truth = data_eval$truth,response = data_eval$pred)
mae <- mlr3measures::mae(truth = data_eval$truth,response = data_eval$pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_tangga
# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)
mean_metric_tangga
}
)
best_tangga <- cbind(breaks=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
## breaks rmse mae
## 1 3 3.832511 2.623246
## 2 4 3.899563 2.665776
## 3 5 3.925976 2.731358
## 4 6 3.831727 2.628047
## 5 7 3.804444 2.564559
## 6 8 3.825035 2.565173
## 7 9 3.816675 2.541445
## 8 10 3.834900 2.555568
#berdasarkan rmse
best_tangga %>% slice_min(rmse)
## breaks rmse mae
## 1 7 3.804444 2.564559
#berdasarkan mae
best_tangga %>% slice_min(mae)
## breaks rmse mae
## 1 9 3.816675 2.541445
nilai_metric <- rbind(mean_metric_linear,
mean_metric_poly2,
mean_metric_poly3,
best_tangga %>% select(-1) %>% slice_min(mae))
nama_model <- c("Linear","Polynomial(2)","Polynomial(3)","Tangga (Breaks=9)")
data.frame(nama_model,nilai_metric)
## nama_model rmse mae
## 1 Linear 3.983055 2.837318
## 2 Polynomial(2) 3.988963 2.856220
## 3 Polynomial(3) 3.855210 2.643331
## 4 Tangga (Breaks=9) 3.816675 2.541445
Berdasarkan hasil Cross-Validation, model Tangga (Breaks=9) merupakan model terbaik berdasarkan nilai rmse (3.816675) dan mae (2.541445) yang lebih kecil dibanding ketiga model lainnya.
Data yang digunakan adalah data dari set seed 80. Sebanyak data 1000 dibangkitkan dengan menyebar secara normal yang mempunyai rata-rata dan ragam 1. Data dibagi menjadi 2 bagian, yaitu data.x kurang dari sama dengan 1 dan data.x lebih dari 1
set.seed(80)
data.x <- rnorm(1000,1,1)
err <- rnorm(1000,0,5)
y <- 5+2*data.x+3*data.x^2+err
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), pch=16, col="blue")
abline(v=1, col="red", lty=2)
dt.all <- cbind(y,data.x)
##knots=1
dt1 <- dt.all[data.x <=1,]
dim(dt1)
## [1] 502 2
Data .x yang kurang dari sama dengan 1 sebanyak 502 data
dt2 <- dt.all[data.x >1,]
dim(dt2)
## [1] 498 2
Data.x yang lebih dari 1 sebanyak 498 data
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), pch=16, col="blue")
cub.mod1 <- lm(dt1[,1]~dt1[,2]+I(dt1[,2]^2)+I(dt1[,2]^3))
ix <- sort(dt1[,2], index.return=T)$ix
lines(dt1[ix,2],cub.mod1$fitted.values[ix],col="red", lwd=2)
cub.mod2 <- lm(dt2[,1]~dt2[,2]+I(dt2[,2]^2)+I(dt2[,2]^3))
ix <- sort(dt2[,2], index.return=T)$ix
lines(dt2[ix,2],cub.mod2$fitted.values[ix],col="red", lwd=2)
Dapat dilihat pada plot di atas bahwa terjadi perbedaan garis pada
data.x sama dengan 1
Untuk menghaluskan data.x sama dengan 1 maka dilakuan penghalusan dengan menggunakan truncated power basis
#dengan menggunakan truncated power basis
plot(data.x,y,xlim=c( -2,5), ylim=c( -10,70), pch=16,col="blue")
abline(v=1,col="red", lty=2)
hx <- ifelse( data.x>1,(data.x-1)^3,0)
cubspline.mod <- lm( y~data.x+I(data.x^2)+I(data.x^3)+hx)
ix <- sort( data.x,index.return=T)$ix
lines( data.x[ix], cubspline.mod$fitted.values[ix],col="green", lwd=2)
Data dibagi menjadi 3 bagian, yaitu : data.x < 0 1 < data.x < 2 data.x > 2
plot( data.x,y,xlim=c(-2,5), ylim=c( -10,70), pch=16,col="blue")
abline(v=0,col="red", lty=2)
abline(v=2,col="red", lty=2)
hx1 <- ifelse( data.x>0,(data.x-0)^3,0)
hx2 <- ifelse( data.x>2,(data.x-2)^3,0)
cubspline.mod2 <- lm( y~data.x+I(data.x^2)+I(data.x^3)+hx1+hx2)
ix <- sort( data.x,index.return=T)$ix
lines( data.x[ix],cubspline.mod2$fitted.values[ix],col="green", lwd=2)
data.all <- cbind( y,data.x,hx)
set.seed(100)
ind <- sample(1:5,length( data.x),replace=T)
res <- c()
for( i in 1:5){
dt.train <- data.all[ ind!=i,]
x.test <- data.all[ ind==i, -1]
y.test <- data.all[ ind==i,1]
mod1 <- lm( dt.train[,1]~dt.train[,2]+I( dt.train[,2]^2)+I( dt.train[,2]^3)+dt.train[,3])
x.test.olah <- cbind(1,x.test[,1], x.test[,1]^2,x.test[,1]^3,x.test[,2])
beta <- coefficients(mod1)
prediksi <- x.test.olah%*%beta
res <- c( res,mean(( y.test-prediksi)^2))
}
res
## [1] 28.02283 25.37608 23.36758 23.24525 29.98228
mean(res)
## [1] 25.9988
data.all2 <- cbind(y,data.x,hx1,hx2)
set.seed(100)
ind2 <- sample(1:5,length( data.x),replace=T)
res2 <- c()
for( i in 1:5){
dt.train2 <- data.all2[ind2!=i,]
x.test2 <- data.all2[ind2==i,-1]
y.test2 <- data.all2[ind2==i,1]
mod2 <- lm(dt.train2[,1]~dt.train2[,2]+I(dt.train2[,2]^2)+I(dt.train2[,2]^3)+dt.train2[,3]+dt.train2[,4])
x.test.olah2 <- cbind(1,x.test2[,1],x.test2[,1]^2,x.test2[,1]^3,x.test2[,2],x.test2[,3])
beta2 <- coefficients(mod2)
prediksi2 <- x.test.olah2%*%beta2
res2 <- c(res2,mean((y.test2-prediksi2)^2))
}
res2
## [1] 28.10684 25.41501 23.31354 23.27186 29.95851
mean(res2)
## [1] 26.01315
ss1 <- smooth.spline(data.x,y,all.knots = T)
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,80), pch=16,col="orange")
lines(ss1,col="purple", lwd=2)
#Menentukan banyaknya fungsi basis dan knots
dim(bs(triceps$age, knots = c(10, 20,40)))
## [1] 892 6
#atau
dim(bs(triceps$age, df=6))
## [1] 892 6
#nilai knots yang ditentukan oleh komputer
attr(bs(triceps$age, df=6),"knots")
## 25% 50% 75%
## 5.5475 12.2100 24.7275
Menggunakan knot yang ditebentukan manual
knot_value_manual_3 = c(10, 20,40)
mod_spline_1 = lm(triceps ~ bs(age, knots =knot_value_manual_3 ),data=triceps)
summary(mod_spline_1)
##
## Call:
## lm(formula = triceps ~ bs(age, knots = knot_value_manual_3),
## data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.385 -1.682 -0.393 1.165 22.855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.22533 0.61873 13.294 < 2e-16 ***
## bs(age, knots = knot_value_manual_3)1 -0.06551 1.14972 -0.057 0.955
## bs(age, knots = knot_value_manual_3)2 -4.30051 0.72301 -5.948 3.90e-09 ***
## bs(age, knots = knot_value_manual_3)3 7.80435 1.17793 6.625 6.00e-11 ***
## bs(age, knots = knot_value_manual_3)4 6.14890 1.27439 4.825 1.65e-06 ***
## bs(age, knots = knot_value_manual_3)5 5.56640 1.42225 3.914 9.78e-05 ***
## bs(age, knots = knot_value_manual_3)6 7.90178 1.54514 5.114 3.87e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.76 on 885 degrees of freedom
## Multiple R-squared: 0.4195, Adjusted R-squared: 0.4155
## F-statistic: 106.6 on 6 and 885 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = knot_value_manual_3),
lty = 1,se = F)
knot_value_pc_df_6 = attr(bs(triceps$age, df=6),"knots")
mod_spline_1 = lm(triceps ~ bs(age, knots =knot_value_pc_df_6 ),data=triceps)
summary(mod_spline_1)
##
## Call:
## lm(formula = triceps ~ bs(age, knots = knot_value_pc_df_6), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.0056 -1.7556 -0.2944 1.2008 23.0695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5925 0.8770 7.517 1.37e-13 ***
## bs(age, knots = knot_value_pc_df_6)1 3.7961 1.5015 2.528 0.01164 *
## bs(age, knots = knot_value_pc_df_6)2 -2.0749 0.8884 -2.335 0.01974 *
## bs(age, knots = knot_value_pc_df_6)3 1.5139 1.1645 1.300 0.19391
## bs(age, knots = knot_value_pc_df_6)4 11.6394 1.3144 8.855 < 2e-16 ***
## bs(age, knots = knot_value_pc_df_6)5 5.9680 1.5602 3.825 0.00014 ***
## bs(age, knots = knot_value_pc_df_6)6 8.9127 1.4053 6.342 3.60e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.757 on 885 degrees of freedom
## Multiple R-squared: 0.4206, Adjusted R-squared: 0.4167
## F-statistic: 107.1 on 6 and 885 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = knot_value_pc_df_6),
lty = 1,se = F)
knot_value_manual_3 = c(10, 20,40)
mod_spline3ns = lm(triceps ~ ns(age, knots = knot_value_manual_3),data=triceps)
summary(mod_spline3ns)
##
## Call:
## lm(formula = triceps ~ ns(age, knots = knot_value_manual_3),
## data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7220 -1.7640 -0.3985 1.1908 22.9684
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.8748 0.3742 23.714 < 2e-16 ***
## ns(age, knots = knot_value_manual_3)1 7.0119 0.6728 10.422 < 2e-16 ***
## ns(age, knots = knot_value_manual_3)2 6.0762 0.8625 7.045 3.72e-12 ***
## ns(age, knots = knot_value_manual_3)3 2.0780 1.0632 1.954 0.051 .
## ns(age, knots = knot_value_manual_3)4 8.8616 0.9930 8.924 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.762 on 887 degrees of freedom
## Multiple R-squared: 0.4176, Adjusted R-squared: 0.415
## F-statistic: 159 on 4 and 887 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black")+
stat_smooth(method = "lm",
formula = y~ns(x, knots = knot_value_manual_3),
lty = 1,se=F)
model_sms <- with(data = triceps,smooth.spline(age,triceps))
model_sms
## Call:
## smooth.spline(x = age, y = triceps)
##
## Smoothing Parameter spar= 0.5162704 lambda= 1.232259e-06 (13 iterations)
## Equivalent Degrees of Freedom (Df): 50.00639
## Penalized Criterion (RSS): 8591.581
## GCV: 13.77835
pred_data <- broom::augment(model_sms)
ggplot(pred_data,aes(x=x,y=y))+
geom_point(alpha=0.55, color="black")+
geom_line(aes(y=.fitted),col="blue",
lty=1)+
xlab("age")+
ylab("triceps")+
theme_bw()
model_sms_lambda <- data.frame(lambda=seq(0,5,by=0.5)) %>%
group_by(lambda) %>%
do(broom::augment(with(data = triceps,smooth.spline(age,triceps,lambda = .$lambda))))
p <- ggplot(model_sms_lambda,
aes(x=x,y=y))+
geom_line(aes(y=.fitted),
col="blue",
lty=1
)+
facet_wrap(~lambda)
p
model_sms <- with(data = triceps,smooth.spline(age,triceps,df=7))
model_sms
## Call:
## smooth.spline(x = age, y = triceps, df = 7)
##
## Smoothing Parameter spar= 1.049874 lambda= 0.008827582 (11 iterations)
## Equivalent Degrees of Freedom (Df): 7.000747
## Penalized Criterion (RSS): 10112.16
## GCV: 14.20355
pred_data <- broom::augment(model_sms)
ggplot(pred_data,aes(x=x,y=y))+
geom_point(alpha=0.55, color="black")+
geom_line(aes(y=.fitted),col="blue",
lty=1)+
xlab("age")+
ylab("triceps")+
theme_bw()
model_loess <- loess(triceps~age,
data = triceps)
summary(model_loess)
## Call:
## loess(formula = triceps ~ age, data = triceps)
##
## Number of Observations: 892
## Equivalent Number of Parameters: 4.6
## Residual Standard Error: 3.777
## Trace of smoother matrix: 5.01 (exact)
##
## Control settings:
## span : 0.75
## degree : 2
## family : gaussian
## surface : interpolate cell = 0.2
## normalize: TRUE
## parametric: FALSE
## drop.square: FALSE
model_loess_span <- data.frame(span=seq(0.1,5,by=0.5)) %>%
group_by(span) %>%
do(broom::augment(loess(triceps~age,
data = triceps,span=.$span)))
p2 <- ggplot(model_loess_span,
aes(x=age,y=triceps))+
geom_line(aes(y=.fitted),
col="blue",
lty=1
)+
facet_wrap(~span)
p2
library(ggplot2)
ggplot(triceps, aes(age,triceps)) +
geom_point(alpha=0.5,color="black") +
stat_smooth(method='loess',
formula=y~x,
span = 0.75,
col="blue",
lty=1,
se=F)
set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
span <- seq(0.1,1,length.out=50)
best_loess <- map_dfr(span, function(i){
metric_loess <- map_dfr(cross_val$splits,
function(x){
mod <- loess(triceps ~ age,span = i,
data=triceps[x$in_id,])
pred <- predict(mod,
newdata=triceps[-x$in_id,])
truth <- triceps[-x$in_id,]$triceps
data_eval <- na.omit(data.frame(pred=pred,
truth=truth))
rmse <- mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred
)
mae <- mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
head(metric_loess,20)
# menghitung rata-rata untuk 10 folds
mean_metric_loess <- colMeans(metric_loess)
mean_metric_loess
}
)
best_loess <- cbind(span=span,best_loess)
# menampilkan hasil all breaks
best_loess
## span rmse mae
## 1 0.1000000 3.773822 2.495752
## 2 0.1183673 3.771342 2.493506
## 3 0.1367347 3.766450 2.487557
## 4 0.1551020 3.757385 2.480571
## 5 0.1734694 3.749539 2.473494
## 6 0.1918367 3.746288 2.470915
## 7 0.2102041 3.742741 2.467990
## 8 0.2285714 3.740397 2.465388
## 9 0.2469388 3.737724 2.463311
## 10 0.2653061 3.737105 2.462252
## 11 0.2836735 3.735645 2.460514
## 12 0.3020408 3.734522 2.459225
## 13 0.3204082 3.733298 2.457694
## 14 0.3387755 3.732538 2.456280
## 15 0.3571429 3.732082 2.455452
## 16 0.3755102 3.731261 2.454642
## 17 0.3938776 3.730882 2.454293
## 18 0.4122449 3.730585 2.454291
## 19 0.4306122 3.730351 2.454869
## 20 0.4489796 3.730717 2.456222
## 21 0.4673469 3.730831 2.456768
## 22 0.4857143 3.731387 2.458307
## 23 0.5040816 3.732091 2.460189
## 24 0.5224490 3.732392 2.461811
## 25 0.5408163 3.733609 2.464543
## 26 0.5591837 3.734393 2.467180
## 27 0.5775510 3.735554 2.470517
## 28 0.5959184 3.736679 2.473012
## 29 0.6142857 3.738227 2.476519
## 30 0.6326531 3.741401 2.476887
## 31 0.6510204 3.742660 2.479842
## 32 0.6693878 3.744336 2.483581
## 33 0.6877551 3.746651 2.488792
## 34 0.7061224 3.748708 2.492180
## 35 0.7244898 3.750734 2.495218
## 36 0.7428571 3.752815 2.498359
## 37 0.7612245 3.753998 2.504175
## 38 0.7795918 3.755981 2.511421
## 39 0.7979592 3.756794 2.514375
## 40 0.8163265 3.759456 2.522994
## 41 0.8346939 3.767892 2.543250
## 42 0.8530612 3.777168 2.557863
## 43 0.8714286 3.782407 2.566972
## 44 0.8897959 3.788428 2.578042
## 45 0.9081633 3.797002 2.595698
## 46 0.9265306 3.803259 2.609852
## 47 0.9448980 3.809478 2.623386
## 48 0.9632653 3.824192 2.655691
## 49 0.9816327 3.835596 2.679155
## 50 1.0000000 3.860514 2.718911
library(ISLR)
AutoData = Auto %>% select(mpg, horsepower, origin)
tibble(AutoData)
## # A tibble: 392 x 3
## mpg horsepower origin
## <dbl> <dbl> <dbl>
## 1 18 130 1
## 2 15 165 1
## 3 18 150 1
## 4 16 150 1
## 5 17 140 1
## 6 15 198 1
## 7 14 220 1
## 8 14 215 1
## 9 14 225 1
## 10 15 190 1
## # ... with 382 more rows
plot(AutoData$mpg, AutoData$horsepower,col="blue")
Berdasarkan scatterplot di atas terlihat bahwa pola hubungan data tidak
linear. Garis yang diperoleh cenderung melengkung dan dapat dikatakan
bahwa data tersebut tidak linear. Kemudian, akan dicobakan beberapa
model sampai dapat model yang merepresentasikan pola hubungan data.
lin.mod2 <-lm(AutoData$horsepower~AutoData$mpg)
plot(AutoData$mpg,AutoData$horsepower)
lines(AutoData$mpg,lin.mod2$fitted.values,col="Blue")
summary(lin.mod2)
##
## Call:
## lm(formula = AutoData$horsepower ~ AutoData$mpg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.892 -15.716 -2.094 13.108 96.947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 194.4756 3.8732 50.21 <2e-16 ***
## AutoData$mpg -3.8389 0.1568 -24.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.19 on 390 degrees of freedom
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.6049
## F-statistic: 599.7 on 1 and 390 DF, p-value: < 2.2e-16
Garis fungsi linear yang ditunjukkan oleh garis biru belum mengikuti pola data
pol.mod2 <- lm(AutoData$horsepower~AutoData$mpg+I(AutoData$mpg^2))
ix2 <- sort(AutoData$mpg,index.return=T)$ix
plot(AutoData$mpg,AutoData$horsepower)
lines(AutoData$mpg[ix2], pol.mod2$fitted.values[ix2],col="red", cex=2)
Garis fungsi polinomial dengan ordo 2 yang ditunjukkan oleh garis merah
sudah mengikuti pola data. Hal tersebut dapat dilihat pada garis fungsi
polinomial tersebut menggambarkan pola data, yaitu cenderung melengkung
dan menurun
summary(pol.mod2)
##
## Call:
## lm(formula = AutoData$horsepower ~ AutoData$mpg + I(AutoData$mpg^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -72.52 -11.24 -0.11 9.47 92.98
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 302.06824 9.25689 32.63 <2e-16 ***
## AutoData$mpg -13.32406 0.77456 -17.20 <2e-16 ***
## I(AutoData$mpg^2) 0.18804 0.01513 12.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.49 on 389 degrees of freedom
## Multiple R-squared: 0.718, Adjusted R-squared: 0.7165
## F-statistic: 495.1 on 2 and 389 DF, p-value: < 2.2e-16
range(AutoData$mpg)
## [1] 9.0 46.6
d1 <- as.factor(ifelse(AutoData$mpg<=20,1,0))
d2 <- as.factor(ifelse(AutoData$mpg<=30 & AutoData$mpg>20,1,0))
d3 <- as.factor(ifelse(AutoData$mpg>30,1,0))
data.frame(AutoData$horsepower,d1,d2,d3)
## AutoData.horsepower d1 d2 d3
## 1 130 1 0 0
## 2 165 1 0 0
## 3 150 1 0 0
## 4 150 1 0 0
## 5 140 1 0 0
## 6 198 1 0 0
## 7 220 1 0 0
## 8 215 1 0 0
## 9 225 1 0 0
## 10 190 1 0 0
## 11 170 1 0 0
## 12 160 1 0 0
## 13 150 1 0 0
## 14 225 1 0 0
## 15 95 0 1 0
## 16 95 0 1 0
## 17 97 1 0 0
## 18 85 0 1 0
## 19 88 0 1 0
## 20 46 0 1 0
## 21 87 0 1 0
## 22 90 0 1 0
## 23 95 0 1 0
## 24 113 0 1 0
## 25 90 0 1 0
## 26 215 1 0 0
## 27 200 1 0 0
## 28 210 1 0 0
## 29 193 1 0 0
## 30 88 0 1 0
## 31 90 0 1 0
## 32 95 0 1 0
## 33 100 1 0 0
## 34 105 1 0 0
## 35 100 1 0 0
## 36 88 1 0 0
## 37 100 1 0 0
## 38 165 1 0 0
## 39 175 1 0 0
## 40 153 1 0 0
## 41 150 1 0 0
## 42 180 1 0 0
## 43 170 1 0 0
## 44 175 1 0 0
## 45 110 1 0 0
## 46 72 0 1 0
## 47 100 1 0 0
## 48 88 1 0 0
## 49 86 0 1 0
## 50 90 0 1 0
## 51 70 0 1 0
## 52 76 0 1 0
## 53 65 0 0 1
## 54 69 0 0 1
## 55 60 0 1 0
## 56 70 0 1 0
## 57 95 0 1 0
## 58 80 0 1 0
## 59 54 0 1 0
## 60 90 1 0 0
## 61 86 0 1 0
## 62 165 1 0 0
## 63 175 1 0 0
## 64 150 1 0 0
## 65 153 1 0 0
## 66 150 1 0 0
## 67 208 1 0 0
## 68 155 1 0 0
## 69 160 1 0 0
## 70 190 1 0 0
## 71 97 1 0 0
## 72 150 1 0 0
## 73 130 1 0 0
## 74 140 1 0 0
## 75 150 1 0 0
## 76 112 1 0 0
## 77 76 0 1 0
## 78 87 0 1 0
## 79 69 0 1 0
## 80 86 0 1 0
## 81 92 0 1 0
## 82 97 0 1 0
## 83 80 0 1 0
## 84 88 0 1 0
## 85 175 1 0 0
## 86 150 1 0 0
## 87 145 1 0 0
## 88 137 1 0 0
## 89 150 1 0 0
## 90 198 1 0 0
## 91 150 1 0 0
## 92 158 1 0 0
## 93 150 1 0 0
## 94 215 1 0 0
## 95 225 1 0 0
## 96 175 1 0 0
## 97 105 1 0 0
## 98 100 1 0 0
## 99 100 1 0 0
## 100 88 1 0 0
## 101 95 0 1 0
## 102 46 0 1 0
## 103 150 1 0 0
## 104 167 1 0 0
## 105 170 1 0 0
## 106 180 1 0 0
## 107 100 1 0 0
## 108 88 1 0 0
## 109 72 0 1 0
## 110 94 0 1 0
## 111 90 1 0 0
## 112 85 1 0 0
## 113 107 0 1 0
## 114 90 0 1 0
## 115 145 1 0 0
## 116 230 1 0 0
## 117 49 0 1 0
## 118 75 0 1 0
## 119 91 1 0 0
## 120 112 1 0 0
## 121 150 1 0 0
## 122 110 0 1 0
## 123 122 1 0 0
## 124 180 1 0 0
## 125 95 1 0 0
## 126 100 1 0 0
## 127 100 1 0 0
## 128 67 0 0 1
## 129 80 0 1 0
## 130 65 0 0 1
## 131 75 0 1 0
## 132 100 1 0 0
## 133 110 1 0 0
## 134 105 1 0 0
## 135 140 1 0 0
## 136 150 1 0 0
## 137 150 1 0 0
## 138 140 1 0 0
## 139 150 1 0 0
## 140 83 0 1 0
## 141 67 0 1 0
## 142 78 0 1 0
## 143 52 0 0 1
## 144 61 0 0 1
## 145 75 0 1 0
## 146 75 0 1 0
## 147 75 0 1 0
## 148 97 0 1 0
## 149 93 0 1 0
## 150 67 0 0 1
## 151 95 1 0 0
## 152 105 1 0 0
## 153 72 1 0 0
## 154 72 1 0 0
## 155 170 1 0 0
## 156 145 1 0 0
## 157 150 1 0 0
## 158 148 1 0 0
## 159 110 1 0 0
## 160 105 1 0 0
## 161 110 1 0 0
## 162 95 1 0 0
## 163 110 0 1 0
## 164 110 1 0 0
## 165 129 1 0 0
## 166 75 0 1 0
## 167 83 0 1 0
## 168 100 1 0 0
## 169 78 0 1 0
## 170 96 0 1 0
## 171 71 0 1 0
## 172 97 0 1 0
## 173 97 1 0 0
## 174 70 0 1 0
## 175 90 1 0 0
## 176 95 0 1 0
## 177 88 0 1 0
## 178 98 0 1 0
## 179 115 0 1 0
## 180 53 0 0 1
## 181 86 0 1 0
## 182 81 0 1 0
## 183 92 0 1 0
## 184 79 0 1 0
## 185 83 0 1 0
## 186 140 1 0 0
## 187 150 1 0 0
## 188 120 1 0 0
## 189 152 1 0 0
## 190 100 0 1 0
## 191 105 0 1 0
## 192 81 0 1 0
## 193 90 0 1 0
## 194 52 0 1 0
## 195 60 0 1 0
## 196 70 0 1 0
## 197 53 0 0 1
## 198 100 1 0 0
## 199 78 1 0 0
## 200 110 1 0 0
## 201 95 1 0 0
## 202 71 0 1 0
## 203 70 0 0 1
## 204 75 0 1 0
## 205 72 0 1 0
## 206 102 1 0 0
## 207 150 1 0 0
## 208 88 1 0 0
## 209 108 1 0 0
## 210 120 1 0 0
## 211 180 1 0 0
## 212 145 1 0 0
## 213 130 1 0 0
## 214 150 1 0 0
## 215 68 0 0 1
## 216 80 0 1 0
## 217 58 0 0 1
## 218 96 0 1 0
## 219 70 0 0 1
## 220 145 1 0 0
## 221 110 1 0 0
## 222 145 1 0 0
## 223 130 1 0 0
## 224 110 1 0 0
## 225 105 0 1 0
## 226 100 1 0 0
## 227 98 1 0 0
## 228 180 1 0 0
## 229 170 1 0 0
## 230 190 1 0 0
## 231 149 1 0 0
## 232 78 0 1 0
## 233 88 0 1 0
## 234 75 0 1 0
## 235 89 0 1 0
## 236 63 0 0 1
## 237 83 0 0 1
## 238 67 0 1 0
## 239 78 0 0 1
## 240 97 0 1 0
## 241 110 0 1 0
## 242 110 0 1 0
## 243 48 0 0 1
## 244 66 0 0 1
## 245 52 0 0 1
## 246 70 0 0 1
## 247 60 0 0 1
## 248 110 1 0 0
## 249 140 1 0 0
## 250 139 0 1 0
## 251 105 1 0 0
## 252 95 0 1 0
## 253 85 0 1 0
## 254 88 0 1 0
## 255 100 0 1 0
## 256 90 1 0 0
## 257 105 0 1 0
## 258 85 0 1 0
## 259 110 1 0 0
## 260 120 1 0 0
## 261 145 1 0 0
## 262 165 1 0 0
## 263 139 1 0 0
## 264 140 1 0 0
## 265 68 0 1 0
## 266 95 0 1 0
## 267 97 0 1 0
## 268 75 0 0 1
## 269 95 0 1 0
## 270 105 0 1 0
## 271 85 0 1 0
## 272 97 0 1 0
## 273 103 0 1 0
## 274 125 1 0 0
## 275 115 0 1 0
## 276 133 1 0 0
## 277 71 0 0 1
## 278 68 0 1 0
## 279 115 0 1 0
## 280 85 1 0 0
## 281 88 0 1 0
## 282 90 0 1 0
## 283 110 0 1 0
## 284 130 1 0 0
## 285 129 1 0 0
## 286 138 1 0 0
## 287 135 1 0 0
## 288 155 1 0 0
## 289 142 1 0 0
## 290 125 1 0 0
## 291 150 1 0 0
## 292 71 0 0 1
## 293 65 0 0 1
## 294 80 0 0 1
## 295 80 0 1 0
## 296 77 0 1 0
## 297 125 0 1 0
## 298 71 0 1 0
## 299 90 0 1 0
## 300 70 0 0 1
## 301 70 0 0 1
## 302 65 0 0 1
## 303 69 0 0 1
## 304 90 0 1 0
## 305 115 0 1 0
## 306 115 0 1 0
## 307 90 0 0 1
## 308 76 0 0 1
## 309 60 0 0 1
## 310 70 0 0 1
## 311 65 0 0 1
## 312 90 0 1 0
## 313 88 0 1 0
## 314 90 0 1 0
## 315 90 1 0 0
## 316 78 0 0 1
## 317 90 0 1 0
## 318 75 0 0 1
## 319 92 0 0 1
## 320 75 0 0 1
## 321 65 0 0 1
## 322 105 0 1 0
## 323 65 0 0 1
## 324 48 0 0 1
## 325 48 0 0 1
## 326 67 0 0 1
## 327 67 0 1 0
## 328 67 0 0 1
## 329 67 0 0 1
## 330 62 0 1 0
## 331 132 0 0 1
## 332 100 0 1 0
## 333 88 0 0 1
## 334 72 0 0 1
## 335 84 0 1 0
## 336 84 0 1 0
## 337 92 0 1 0
## 338 110 0 1 0
## 339 84 0 1 0
## 340 58 0 0 1
## 341 64 0 0 1
## 342 60 0 0 1
## 343 67 0 0 1
## 344 65 0 0 1
## 345 62 0 0 1
## 346 68 0 0 1
## 347 63 0 0 1
## 348 65 0 0 1
## 349 65 0 1 0
## 350 74 0 0 1
## 351 75 0 0 1
## 352 75 0 0 1
## 353 100 0 0 1
## 354 74 0 0 1
## 355 80 0 1 0
## 356 76 0 0 1
## 357 116 0 1 0
## 358 120 0 1 0
## 359 110 0 1 0
## 360 105 0 1 0
## 361 88 0 1 0
## 362 85 1 0 0
## 363 88 0 1 0
## 364 88 0 1 0
## 365 88 0 0 1
## 366 85 0 0 1
## 367 84 0 1 0
## 368 90 0 1 0
## 369 92 0 1 0
## 370 74 0 0 1
## 371 68 0 0 1
## 372 68 0 0 1
## 373 63 0 0 1
## 374 70 0 0 1
## 375 88 0 0 1
## 376 75 0 0 1
## 377 70 0 0 1
## 378 67 0 0 1
## 379 67 0 0 1
## 380 67 0 0 1
## 381 110 0 1 0
## 382 85 0 0 1
## 383 92 0 1 0
## 384 112 0 1 0
## 385 96 0 0 1
## 386 84 0 0 1
## 387 90 0 1 0
## 388 86 0 1 0
## 389 52 0 0 1
## 390 84 0 0 1
## 391 79 0 1 0
## 392 82 0 0 1
step.mod2 <- lm(AutoData$horsepower~d1+d2+d3)
plot(AutoData$mpg,AutoData$horsepower)
lines(AutoData$mpg,lin.mod2$fitted.values,col="red")
lines(AutoData$mpg[ix2], step.mod2$fitted.values[ix2],col="blue")
summary(step.mod2)
##
## Call:
## lm(formula = AutoData$horsepower ~ d1 + d2 + d3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -65.450 -12.966 0.034 12.550 92.550
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.518 2.886 24.431 < 2e-16 ***
## d11 66.932 3.557 18.816 < 2e-16 ***
## d21 17.448 3.602 4.844 1.84e-06 ***
## d31 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.3 on 389 degrees of freedom
## Multiple R-squared: 0.5356, Adjusted R-squared: 0.5332
## F-statistic: 224.3 on 2 and 389 DF, p-value: < 2.2e-16
Garis fungsi tangga dengan 3 selang yang ditunjukkan oleh garis biru sedikit mengikuti pola data. Hal tersebut dapat dilihat pada garis fungsi tangga berwarna biru tersebut mengikuti pola data pada data x atau AUtoData$mpg lebih dari 20. Akan tetapi, pada data x yang kurang dari 20 belum mengikuti pola hubungan data
nilai_AIC <- rbind(AIC(lin.mod2),
AIC(pol.mod2),
AIC(step.mod2))
nama_model <- c("Linear","Poly (ordo=2)","Tangga (breaks=3)")
data.frame(nama_model,nilai_AIC)
## nama_model nilai_AIC
## 1 Linear 3614.324
## 2 Poly (ordo=2) 3485.217
## 3 Tangga (breaks=3) 3680.688
Model dengan nilai AIC terendah adalah model yang paling merepresentasikan pola hubungan data. Berdasarkan hasil perbandingan nilai AIC ketiga model, model polinomial dengan ordo 2 adalah model dengan nilai AIC terendah, yaitu 3485.217. Maka, dapat disimpulkan bahwa model polinomial ordo 2 menjadi model yang merepresentasika pola hubungan data.
plot(AutoData$mpg,AutoData$horsepower)
lines(AutoData$mpg,lin.mod2$fitted.values,col="orange")
lines(AutoData$mpg[ix2], pol.mod2$fitted.values[ix2],col="green", cex=2)
lines(AutoData$mpg[ix2], step.mod2$fitted.values[ix2],col="blue")
Secara eksploratif, Garis fungsi yang paling merepresentasikan pola
hubungan data adalah garis hijau, yaitu garis fungsi polinomial dengan
ordo 2
MSE2 = function(pred,actual){
mean((pred-actual)^2)
}
MSE2(predict(lin.mod2),AutoData$horsepower)
## [1] 582.3257
MSE2(predict(pol.mod2),AutoData$horsepower)
## [1] 416.7871
MSE2(predict(step.mod2),AutoData$horsepower)
## [1] 686.2376
nilai_MSE <- rbind(MSE2(predict(lin.mod2),AutoData$horsepower),MSE2(predict(pol.mod2),AutoData$horsepower),MSE2(predict(step.mod2),AutoData$horsepower))
nama_model <- c("Linear","Poly (ordo=2)","Tangga (breaks=3)")
data.frame(nama_model,nilai_MSE)
## nama_model nilai_MSE
## 1 Linear 582.3257
## 2 Poly (ordo=2) 416.7871
## 3 Tangga (breaks=3) 686.2376
Berdasarkan perbandingan nilai MSE ketida model, Model polinomial dengan ordo 2 adalah model dengan nilai MSE terendah, yaitu 416.7871. Dengan demikian dapat disimpulkan bahwa model terbaik untuk data ini adalah polinomial dengan ordo 2.