Afris Setiya Intan Amanda (G1401201018)

2022-10-21

Nonlinear Regression

Outline

Beberapa pemodelan dalam regresi nonlinier, yaitu:
1. Regresi Polinomial (Polynomial Regression)
2. Regresi Fungsi Tangga (Step Functions)
3. Basis Function Regression
4. Spline Regression

Beberapa metode pemulusan dalam regresi nonlinier, yaitu:
1. Piecewise Cubic Polynomial Function
2. Truncated Power Basis Function
3. Smoothing Spline
4. Natural Spline
5. LOESS Function

Packages yang digunakan dalam data yang dianalisis ini, sebagai berikut:

library(MultiKink)
library(tidyverse)
library(ggplot2)
library(dplyr)
library(purrr)
library(rsample)
library(mlr3measures)
library(splines)
library(ISLR)

Contoh Kuliah

Data Bangkitan & Plot

set.seed(123)
data.x <- rnorm(1000,1,1)
err <- rnorm(1000,0,5)
y <- 5+2*data.x+3*data.x^2+err
#Model polinomial ordo 2
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), pch=16, col="purple")
abline(v=1, col="red", lty=2)

Pertemuan 8

Regresi Linear

#Model dengan nilai dugaan y
lin.mod <-lm( y~data.x)
plot( data.x,y,xlim=c( -2,5), ylim=c( -10,70))
lines(data.x,lin.mod$fitted.values,col="purple")

summary(lin.mod)
## 
## Call:
## lm(formula = y ~ data.x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.6384  -4.2679  -0.4316   3.8259  27.1070 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.4176     0.2919   15.14   <2e-16 ***
## data.x        8.7312     0.2056   42.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.445 on 998 degrees of freedom
## Multiple R-squared:  0.6437, Adjusted R-squared:  0.6434 
## F-statistic:  1803 on 1 and 998 DF,  p-value: < 2.2e-16

Regresi Polimonial

pol.mod <- lm( y~data.x+I(data.x^2))
ix <- sort( data.x,index.return=T)$ix #indeks data yang berurutan berdasarkan nilai
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70))
lines(data.x[ix], pol.mod$fitted.values[ix],col="blue", cex=2)

summary(pol.mod)
## 
## Call:
## lm(formula = y ~ data.x + I(data.x^2))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.1593  -3.4709   0.0247   3.5581  16.4277 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.7597     0.2284  20.841   <2e-16 ***
## data.x        2.5366     0.2930   8.656   <2e-16 ***
## I(data.x^2)   2.9540     0.1169  25.272   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.034 on 997 degrees of freedom
## Multiple R-squared:  0.7828, Adjusted R-squared:  0.7824 
## F-statistic:  1797 on 2 and 997 DF,  p-value: < 2.2e-16

Regresi Fungsi Tangga

range( data.x)
## [1] -1.809775  4.241040
#Fungsi tangga secara manual
c1 <- as.factor(ifelse(data.x<=0,1,0))
c2 <- as.factor(ifelse(data.x<=2 & data.x>0,1,0))
c3 <- as.factor(ifelse(data.x>2,1,0))
data.frame(y, c1, c2, c3)
##                y c1 c2 c3
## 1     1.47960005  0  1  0
## 2     3.11774990  0  1  0
## 3    29.66848014  0  0  1
## 4     9.91810577  0  1  0
## 5    -1.66226604  0  1  0
## 6    37.74773090  0  0  1
## 7    15.57328957  0  1  0
## 8    16.76168677  1  0  0
## 9     9.34646890  0  1  0
## 10    4.79575147  0  1  0
## 11   38.27473885  0  0  1
## 12   27.42803871  0  1  0
## 13    7.59446579  0  1  0
## 14   13.26737350  0  1  0
## 15    5.42391443  0  1  0
## 16   34.80973650  0  0  1
## 17   15.86408276  0  1  0
## 18   -0.43969045  1  0  0
## 19   18.51449542  0  1  0
## 20   15.63450070  0  1  0
## 21    4.05770273  1  0  0
## 22    7.58410632  0  1  0
## 23   11.94287983  1  0  0
## 24   10.25469864  0  1  0
## 25   -2.07076598  0  1  0
## 26    6.18404135  1  0  0
## 27   27.07569393  0  1  0
## 28   18.37393663  0  1  0
## 29    6.88072959  1  0  0
## 30   28.35277897  0  0  1
## 31    7.97265291  0  1  0
## 32    9.40128751  0  1  0
## 33   14.79251046  0  1  0
## 34   17.04833283  0  1  0
## 35   23.27565349  0  1  0
## 36   11.24733267  0  1  0
## 37   16.68640678  0  1  0
## 38   11.65786569  0  1  0
## 39    8.10769799  0  1  0
## 40   16.50145065  0  1  0
## 41    0.77846089  0  1  0
## 42   11.49700187  0  1  0
## 43    4.23586013  1  0  0
## 44   40.16059644  0  0  1
## 45   26.36166872  0  0  1
## 46   -0.30275295  1  0  0
## 47    0.69661531  0  1  0
## 48    4.44765445  0  1  0
## 49   26.82354345  0  1  0
## 50    9.63272255  0  1  0
## 51   13.87623091  0  1  0
## 52    8.82483752  0  1  0
## 53   12.01501363  0  1  0
## 54   21.80963719  0  0  1
## 55   14.13630206  0  1  0
## 56   31.95434042  0  0  1
## 57    0.77361920  1  0  0
## 58   15.97499593  0  1  0
## 59   14.61851167  0  1  0
## 60   14.65607974  0  1  0
## 61   20.87916435  0  1  0
## 62    3.67346016  0  1  0
## 63   13.24810549  0  1  0
## 64   10.14662442  1  0  0
## 65    4.05946383  1  0  0
## 66    7.82498458  0  1  0
## 67    8.74262829  0  1  0
## 68   12.72139695  0  1  0
## 69   19.57423792  0  1  0
## 70   47.90473247  0  0  1
## 71    9.47077529  0  1  0
## 72    5.66370731  1  0  0
## 73   15.95272689  0  0  1
## 74    2.92328269  0  1  0
## 75    7.63044060  0  1  0
## 76   19.10528763  0  0  1
## 77   10.53625354  0  1  0
## 78    3.03302324  1  0  0
## 79   11.02124115  0  1  0
## 80    5.29419317  0  1  0
## 81   19.57143109  0  1  0
## 82   15.19067483  0  1  0
## 83    8.60005453  0  1  0
## 84    7.94136372  0  1  0
## 85   11.68090998  0  1  0
## 86    7.86637558  0  1  0
## 87   17.92627170  0  0  1
## 88   18.64130657  0  1  0
## 89    5.44773828  0  1  0
## 90   14.40787638  0  0  1
## 91   29.75870113  0  1  0
## 92    3.40235872  0  1  0
## 93   14.94489005  0  1  0
## 94   11.24579576  0  1  0
## 95   23.28450551  0  0  1
## 96    8.50029328  0  1  0
## 97   44.04759275  0  0  1
## 98   34.51068677  0  0  1
## 99   10.70155804  0  1  0
## 100   3.72483350  1  0  0
## 101  10.41074024  0  1  0
## 102  16.25614918  0  1  0
## 103   3.52619045  0  1  0
## 104   0.57807961  0  1  0
## 105   5.90517285  0  1  0
## 106   8.27604882  0  1  0
## 107   0.64129376  0  1  0
## 108   5.42220882  1  0  0
## 109   0.79192183  0  1  0
## 110  20.69176893  0  1  0
## 111   3.26565474  0  1  0
## 112  20.75839780  0  1  0
## 113  17.03201741  1  0  0
## 114   4.98486945  0  1  0
## 115  20.25293002  0  1  0
## 116  16.80705558  0  1  0
## 117  10.52794081  0  1  0
## 118   3.83763263  0  1  0
## 119  13.24489619  0  1  0
## 120  -5.07380008  1  0  0
## 121   7.76672098  0  1  0
## 122  -2.07088970  0  1  0
## 123  13.77404746  0  1  0
## 124   7.19449493  0  1  0
## 125  32.32702133  0  0  1
## 126  21.97973918  0  1  0
## 127  11.79912675  0  1  0
## 128   8.42317392  0  1  0
## 129   6.57997773  0  1  0
## 130   1.60266674  0  1  0
## 131  30.26810163  0  0  1
## 132  13.74278415  0  1  0
## 133  12.67758945  0  1  0
## 134   2.24368288  0  1  0
## 135   1.10657548  1  0  0
## 136  19.42339606  0  0  1
## 137   0.87533982  1  0  0
## 138  24.05739688  0  1  0
## 139  44.10258568  0  0  1
## 140   3.91887733  1  0  0
## 141  15.29709564  0  1  0
## 142   6.46346850  0  1  0
## 143   5.18394089  1  0  0
## 144   5.24983424  1  0  0
## 145   6.33263690  1  0  0
## 146   2.86493832  0  1  0
## 147   0.48166155  1  0  0
## 148  22.90841096  0  1  0
## 149  37.28910740  0  0  1
## 150   6.18832701  1  0  0
## 151  17.87865559  0  1  0
## 152  13.13736887  0  1  0
## 153  15.94400584  0  1  0
## 154   5.84898165  1  0  0
## 155  16.08610270  0  1  0
## 156   8.57999987  0  1  0
## 157  13.79705912  0  1  0
## 158   8.82809640  0  1  0
## 159  14.75125984  0  1  0
## 160   3.24481533  0  1  0
## 161  24.29766227  0  0  1
## 162   3.24329661  1  0  0
## 163   4.35292703  1  0  0
## 164  66.86523051  0  0  1
## 165   3.93388695  0  1  0
## 166   2.55919653  0  1  0
## 167  18.05239509  0  1  0
## 168  10.64008361  0  1  0
## 169   8.49275430  0  1  0
## 170  20.77213428  0  1  0
## 171  10.34189635  0  1  0
## 172  17.24333528  0  1  0
## 173   4.94509135  0  1  0
## 174  41.45244409  0  0  1
## 175   5.21797906  0  1  0
## 176   8.67819048  1  0  0
## 177   7.42729324  0  1  0
## 178  12.72255252  0  1  0
## 179   5.17055033  0  1  0
## 180   3.07526646  0  1  0
## 181   5.51054775  1  0  0
## 182  21.36078431  0  0  1
## 183   7.35171565  0  1  0
## 184   2.98360478  0  1  0
## 185  11.31189823  0  1  0
## 186  14.38166939  0  1  0
## 187  18.46262440  0  0  1
## 188   9.16425677  0  1  0
## 189  24.93702791  0  1  0
## 190  -4.24107485  0  1  0
## 191  10.25433389  0  1  0
## 192  18.04229695  0  1  0
## 193  21.75145670  0  1  0
## 194   6.02509638  0  1  0
## 195   0.35014496  1  0  0
## 196  38.77157809  0  0  1
## 197  12.62435188  0  1  0
## 198  11.95095558  1  0  0
## 199   2.19883072  0  1  0
## 200   6.59680639  1  0  0
## 201  45.19403401  0  0  1
## 202  21.87903647  0  0  1
## 203  12.34736866  0  1  0
## 204  11.49108198  0  1  0
## 205  10.35151234  0  1  0
## 206  12.35376632  0  1  0
## 207   0.61464599  0  1  0
## 208  11.84374615  0  1  0
## 209  28.93608178  0  0  1
## 210  11.04829906  0  1  0
## 211  12.00580753  0  1  0
## 212   9.99166820  0  1  0
## 213  25.75731175  0  0  1
## 214   0.51830036  0  1  0
## 215   4.33446977  0  1  0
## 216  35.95841025  0  0  1
## 217  -3.81605403  0  1  0
## 218  -1.65568551  0  1  0
## 219  -1.11475907  1  0  0
## 220  -3.27169079  1  0  0
## 221   8.49446403  0  1  0
## 222  11.12496420  0  1  0
## 223  11.75133836  0  0  1
## 224  13.97490684  0  1  0
## 225   5.53430526  0  1  0
## 226  14.77263703  0  1  0
## 227   0.33383614  0  1  0
## 228  11.61190666  0  1  0
## 229  21.41683709  0  1  0
## 230   6.78130502  1  0  0
## 231  32.84924685  0  0  1
## 232  19.07025553  0  1  0
## 233  11.03303718  0  1  0
## 234  -3.39644005  0  1  0
## 235   5.37537523  0  1  0
## 236  -5.00475493  1  0  0
## 237   7.08443141  0  1  0
## 238  11.76736299  0  1  0
## 239  16.31910313  0  1  0
## 240  10.62758687  0  1  0
## 241   1.92627325  0  1  0
## 242  10.76956525  0  1  0
## 243  35.80469349  0  0  1
## 244   0.86122959  1  0  0
## 245   5.40174383  0  1  0
## 246  39.32972789  0  0  1
## 247  10.13802855  0  1  0
## 248   7.41264939  1  0  0
## 249  13.03102120  0  1  0
## 250  16.52610960  0  1  0
## 251  12.67691593  0  1  0
## 252   9.56663155  0  1  0
## 253   9.77160127  0  1  0
## 254  12.67896430  0  1  0
## 255  36.91037757  0  0  1
## 256   4.30371107  0  1  0
## 257  16.62486498  0  0  1
## 258  19.19931521  0  1  0
## 259   8.53117459  0  1  0
## 260   5.15617569  1  0  0
## 261  10.35213812  0  1  0
## 262  10.56743080  0  1  0
## 263   9.07055357  0  1  0
## 264  23.91041988  0  0  1
## 265  44.48725838  0  0  1
## 266  34.88075882  0  0  1
## 267  11.11808226  0  1  0
## 268  12.36898386  1  0  0
## 269   7.30502608  0  1  0
## 270  16.36586960  0  1  0
## 271  23.31725652  0  1  0
## 272  23.54002132  0  1  0
## 273  18.95281715  0  1  0
## 274   3.27876475  1  0  0
## 275  18.41763673  0  1  0
## 276   8.17275979  0  1  0
## 277  11.73453401  0  1  0
## 278  15.32920539  0  1  0
## 279  13.42872884  0  1  0
## 280   9.84734182  0  1  0
## 281   2.58002338  1  0  0
## 282  16.82756499  0  1  0
## 283  13.19143387  0  1  0
## 284  -3.48217951  0  1  0
## 285   4.16287198  0  1  0
## 286  10.76377369  0  1  0
## 287  10.58828569  0  1  0
## 288  25.19955111  0  0  1
## 289  -1.56614471  0  1  0
## 290   9.65741489  0  1  0
## 291  26.70991244  0  0  1
## 292  30.63353424  0  0  1
## 293  22.90470369  0  0  1
## 294  13.84658807  0  1  0
## 295  38.46478389  0  0  1
## 296  11.12461451  0  1  0
## 297  37.01434875  0  0  1
## 298   0.31470649  1  0  0
## 299   9.91059856  0  1  0
## 300  29.22841437  0  0  1
## 301   2.06414773  0  1  0
## 302   4.07007995  0  1  0
## 303  -0.60459869  0  1  0
## 304   6.67500736  1  0  0
## 305   9.20004802  0  1  0
## 306  16.22020907  0  1  0
## 307  -0.04160235  1  0  0
## 308  12.36682575  0  1  0
## 309  19.76120742  0  0  1
## 310  38.75379254  0  0  1
## 311  32.20170443  0  0  1
## 312  15.25469581  0  1  0
## 313   8.71436703  1  0  0
## 314   2.52503437  0  1  0
## 315   5.16279742  0  1  0
## 316  19.30663762  0  1  0
## 317   5.79256727  0  1  0
## 318  -3.83142409  1  0  0
## 319  38.31329880  0  0  1
## 320  21.58481878  0  1  0
## 321   9.15036438  0  1  0
## 322  14.22584071  0  0  1
## 323  14.17780434  1  0  0
## 324  33.54846689  0  1  0
## 325   7.67441509  0  1  0
## 326  21.12152123  0  1  0
## 327  15.64732385  0  1  0
## 328  12.75658133  0  1  0
## 329  24.27898167  0  0  1
## 330   1.42287498  0  1  0
## 331  17.56385467  0  0  1
## 332   7.84170300  1  0  0
## 333   4.33572344  0  1  0
## 334  28.00675138  0  0  1
## 335  12.88357107  0  1  0
## 336  -3.10126196  1  0  0
## 337   8.85764457  1  0  0
## 338   1.29722648  0  1  0
## 339  18.13210635  0  1  0
## 340   7.02325724  0  1  0
## 341  15.06936069  0  1  0
## 342  27.73094669  0  1  0
## 343  28.83541176  0  0  1
## 344   6.54205951  0  1  0
## 345   1.99398113  0  1  0
## 346   1.16713443  1  0  0
## 347   4.99329513  0  1  0
## 348   8.44596798  0  1  0
## 349   0.73898611  0  1  0
## 350  10.17805908  0  1  0
## 351  20.47773687  0  0  1
## 352  -1.19665919  1  0  0
## 353   3.17642954  0  1  0
## 354  15.39947331  0  1  0
## 355   9.76317916  0  1  0
## 356  23.03356344  0  1  0
## 357  13.78136156  0  1  0
## 358   2.25985082  0  1  0
## 359   4.67269102  1  0  0
## 360  52.45129868  0  0  1
## 361  17.98473097  0  1  0
## 362  17.03174994  0  1  0
## 363  18.11691033  0  1  0
## 364  25.18765812  0  0  1
## 365  12.70651837  0  1  0
## 366   7.59813089  0  1  0
## 367  19.98068245  0  1  0
## 368   9.49860347  0  1  0
## 369  21.37731593  0  1  0
## 370   9.33493357  0  1  0
## 371  44.39954066  0  0  1
## 372  -1.62743036  1  0  0
## 373  13.41108350  0  1  0
## 374  11.54569720  0  1  0
## 375  10.16728642  0  1  0
## 376   9.47144034  0  1  0
## 377  -1.30450314  0  1  0
## 378   8.45907326  0  1  0
## 379   3.97215727  0  1  0
## 380   8.39638862  1  0  0
## 381  12.51155495  0  1  0
## 382  13.73982871  0  1  0
## 383  13.70172298  0  1  0
## 384   7.68527527  1  0  0
## 385  17.68595738  0  1  0
## 386  16.88662018  0  0  1
## 387  13.53026439  0  1  0
## 388   5.34629482  1  0  0
## 389  10.70757818  0  1  0
## 390   7.19076172  0  1  0
## 391   3.58163815  0  1  0
## 392   1.33692749  1  0  0
## 393  24.70590616  0  1  0
## 394  11.82364135  0  1  0
## 395  10.33681561  0  1  0
## 396  22.70433908  0  0  1
## 397   7.86450226  0  1  0
## 398  24.66388377  0  1  0
## 399   5.52220584  1  0  0
## 400   4.71021506  0  1  0
## 401   3.99719211  0  1  0
## 402   1.42151308  1  0  0
## 403   9.70497174  0  1  0
## 404   7.61345758  0  1  0
## 405  17.85314170  0  1  0
## 406  11.44326831  1  0  0
## 407  10.46062487  0  1  0
## 408  24.59106752  0  1  0
## 409  -1.94742636  0  1  0
## 410  10.56993883  0  1  0
## 411  14.99003555  0  1  0
## 412  15.25083337  0  1  0
## 413  10.66026512  0  1  0
## 414  13.09442724  0  1  0
## 415   8.74092305  0  1  0
## 416  11.13254760  1  0  0
## 417  11.82480991  0  1  0
## 418  13.41592013  0  1  0
## 419  19.77087698  0  1  0
## 420   9.72392420  0  1  0
## 421  32.98486823  0  0  1
## 422  17.73939730  0  1  0
## 423  12.52118738  0  1  0
## 424  28.37787951  0  0  1
## 425   2.82996315  0  1  0
## 426  14.90786482  0  1  0
## 427  46.40301030  0  0  1
## 428  14.32861892  0  1  0
## 429  30.57360210  0  0  1
## 430   3.30170141  1  0  0
## 431  12.50694677  0  1  0
## 432   5.53392196  0  1  0
## 433  15.06220849  0  1  0
## 434   6.95665465  1  0  0
## 435  -3.32145989  0  1  0
## 436   6.70502721  1  0  0
## 437   6.38337595  0  1  0
## 438  25.50743359  0  0  1
## 439   4.78224051  1  0  0
## 440  20.99167857  0  0  1
## 441   4.65798684  1  0  0
## 442  21.12385964  0  1  0
## 443  13.40902019  0  1  0
## 444  11.31126568  0  1  0
## 445   7.79339374  0  1  0
## 446   5.50696814  0  1  0
## 447   5.38312508  0  1  0
## 448   9.44416532  0  1  0
## 449   8.97393647  1  0  0
## 450   4.76846687  1  0  0
## 451  36.83422982  0  0  1
## 452  24.52770215  0  0  1
## 453  18.29922045  0  1  0
## 454  23.92778565  0  1  0
## 455  17.35743334  0  1  0
## 456  12.50463767  1  0  0
## 457  26.92432882  0  0  1
## 458  13.67249657  0  1  0
## 459  15.81774274  0  1  0
## 460  10.00582784  0  1  0
## 461  14.91556804  0  1  0
## 462  11.22432136  0  1  0
## 463  17.45589795  0  1  0
## 464  10.50367570  0  1  0
## 465   6.95847921  1  0  0
## 466  23.06327017  0  0  1
## 467  17.18966908  0  1  0
## 468  36.15015929  0  0  1
## 469  11.39212410  0  1  0
## 470  18.64637263  0  0  1
## 471  36.05939815  0  0  1
## 472   1.39021100  0  1  0
## 473  -0.16816948  1  0  0
## 474   7.53399963  0  1  0
## 475  -0.56555174  0  1  0
## 476   2.83975259  0  1  0
## 477  38.00723082  0  0  1
## 478   4.82686375  0  1  0
## 479   4.14297602  0  1  0
## 480   7.76973138  0  1  0
## 481  16.77020670  0  1  0
## 482  16.11962944  0  1  0
## 483  28.12509640  0  0  1
## 484  16.93343103  0  1  0
## 485  13.36966387  0  1  0
## 486  21.69919503  0  1  0
## 487  16.89318850  0  1  0
## 488   6.39845182  0  1  0
## 489  42.02760377  0  0  1
## 490  12.23490218  0  1  0
## 491  13.02733610  0  1  0
## 492  21.60483405  0  0  1
## 493  21.44545112  0  0  1
## 494  16.26489427  1  0  0
## 495  -0.16431291  0  1  0
## 496   5.74090060  1  0  0
## 497  16.01683804  0  1  0
## 498  16.49403434  0  1  0
## 499  18.75621607  0  1  0
## 500  14.51625119  0  1  0
## 501   2.16674874  0  1  0
## 502   3.47643578  0  1  0
## 503  16.86665306  0  0  1
## 504  20.83611338  0  1  0
## 505  10.36119376  1  0  0
## 506  19.90204727  0  1  0
## 507   7.07115684  0  1  0
## 508   1.92411454  1  0  0
## 509  16.39094353  0  1  0
## 510  13.90892428  0  1  0
## 511   8.05824470  0  1  0
## 512   4.08011158  0  1  0
## 513  23.20250509  0  1  0
## 514  17.41731302  0  1  0
## 515   4.73295355  0  1  0
## 516   8.42260990  0  1  0
## 517   7.61824121  0  1  0
## 518  16.18004371  0  1  0
## 519   4.33890931  1  0  0
## 520   6.90405901  0  1  0
## 521  21.10305843  0  1  0
## 522   7.75190643  0  1  0
## 523   2.92092312  0  1  0
## 524   6.44119120  0  1  0
## 525  22.60375174  0  0  1
## 526  16.34547247  0  1  0
## 527  32.14362561  0  0  1
## 528   4.38083834  0  1  0
## 529  12.78908248  0  1  0
## 530   9.62558271  0  1  0
## 531  24.75249081  0  1  0
## 532   8.84852444  0  1  0
## 533   4.62072794  1  0  0
## 534   9.53530578  0  1  0
## 535  38.99736145  0  0  1
## 536  23.27699566  0  1  0
## 537  22.47618196  0  0  1
## 538  14.07598511  0  1  0
## 539   7.32724981  0  1  0
## 540  12.15116542  0  1  0
## 541  13.59491076  0  1  0
## 542   8.30524430  0  1  0
## 543  16.74003960  0  1  0
## 544  11.69544571  1  0  0
## 545  20.00537047  0  1  0
## 546  27.04275834  0  1  0
## 547   4.41149452  1  0  0
## 548  19.41240085  0  1  0
## 549  39.42937499  0  0  1
## 550   5.91178362  0  1  0
## 551  18.79185076  0  1  0
## 552   1.78617733  0  1  0
## 553  17.40726649  0  0  1
## 554  16.92663144  0  1  0
## 555  35.97259019  0  0  1
## 556  -1.77963904  1  0  0
## 557  11.71940531  0  1  0
## 558   1.05482241  0  1  0
## 559   3.28325520  0  1  0
## 560   8.77954048  0  1  0
## 561   1.98501671  0  1  0
## 562  20.60193767  0  1  0
## 563   5.04046771  1  0  0
## 564  31.16003575  0  0  1
## 565   2.11347202  1  0  0
## 566   5.08317752  0  1  0
## 567  19.68990824  0  1  0
## 568  16.91701524  0  1  0
## 569   6.66384104  0  1  0
## 570  11.00115430  0  1  0
## 571  27.09174124  0  1  0
## 572   1.22131839  1  0  0
## 573   1.83239102  0  1  0
## 574   9.77311618  0  1  0
## 575  -0.88971914  0  1  0
## 576  30.68890547  0  0  1
## 577  17.70769143  0  1  0
## 578  11.62881801  0  1  0
## 579   4.54120107  1  0  0
## 580   7.66004326  0  1  0
## 581   3.65193699  0  1  0
## 582  13.47193433  0  1  0
## 583  -2.39515056  1  0  0
## 584  16.93673859  0  1  0
## 585   0.96358168  1  0  0
## 586   9.75530438  0  1  0
## 587   9.78441101  0  1  0
## 588   2.95685625  0  1  0
## 589  24.53039777  0  1  0
## 590   8.00892879  1  0  0
## 591   0.48181869  1  0  0
## 592  15.00039170  0  1  0
## 593  16.58477554  0  1  0
## 594   2.27522849  0  1  0
## 595  15.84436933  0  1  0
## 596   5.41092442  1  0  0
## 597   7.38877859  0  1  0
## 598  15.63985164  1  0  0
## 599  12.39156169  0  0  1
## 600  39.44818907  0  0  1
## 601  20.60748874  0  0  1
## 602  13.06603489  0  1  0
## 603   7.46670149  0  1  0
## 604   1.79751932  1  0  0
## 605   9.64531139  0  1  0
## 606   7.40011110  0  1  0
## 607  18.43371941  0  1  0
## 608   9.63375522  1  0  0
## 609  16.24933347  0  1  0
## 610   9.94505194  0  1  0
## 611  13.01796908  0  1  0
## 612   9.22123136  1  0  0
## 613  34.42155281  0  0  1
## 614   6.96010415  0  1  0
## 615  19.40140769  0  0  1
## 616   1.48872618  1  0  0
## 617   6.96149361  0  1  0
## 618  10.46174903  0  1  0
## 619   9.86284867  1  0  0
## 620  30.02453638  0  0  1
## 621   1.94867529  1  0  0
## 622  29.30382637  0  1  0
## 623  22.60428110  0  1  0
## 624  18.62365326  0  1  0
## 625   3.60776943  0  1  0
## 626  19.99691755  0  1  0
## 627  13.69504102  0  1  0
## 628  13.26771170  1  0  0
## 629   3.11827537  1  0  0
## 630  44.02498676  0  0  1
## 631  -4.52099878  0  1  0
## 632   2.94859309  1  0  0
## 633   8.65912616  0  1  0
## 634  18.33738407  0  1  0
## 635   3.84566210  1  0  0
## 636   3.82971716  1  0  0
## 637  15.95565792  0  1  0
## 638  28.59120635  0  0  1
## 639  11.81284763  0  1  0
## 640   8.92479496  0  1  0
## 641  -0.72811868  0  1  0
## 642  10.01089049  0  1  0
## 643  19.41599519  0  1  0
## 644  23.47794438  0  1  0
## 645  12.84867823  0  1  0
## 646  16.76682613  0  1  0
## 647  -3.98290619  0  1  0
## 648   1.80493088  0  1  0
## 649  22.33571729  0  1  0
## 650  19.40706480  0  1  0
## 651  -5.04767691  1  0  0
## 652   9.99105517  0  1  0
## 653  23.03326385  0  1  0
## 654   7.92542226  1  0  0
## 655  10.43171498  0  1  0
## 656  -2.29735956  0  1  0
## 657  17.15203660  0  0  1
## 658  15.35038331  0  1  0
## 659  22.36711116  0  1  0
## 660   1.30977333  0  1  0
## 661  24.26286210  0  1  0
## 662  26.78886595  0  0  1
## 663  40.17727275  0  0  1
## 664   1.50898058  0  1  0
## 665   3.84901180  1  0  0
## 666   8.01401876  0  1  0
## 667  16.99071264  0  1  0
## 668  13.97050399  0  1  0
## 669  17.69061713  0  1  0
## 670  29.17089447  0  0  1
## 671   4.08400195  0  1  0
## 672  12.96939954  0  1  0
## 673  13.47787279  0  1  0
## 674   3.16973808  0  1  0
## 675  15.37202847  0  1  0
## 676  12.75673097  1  0  0
## 677   9.19458047  1  0  0
## 678   2.11974273  1  0  0
## 679   3.00809250  0  1  0
## 680  12.78395652  0  1  0
## 681  15.26104484  0  1  0
## 682  24.89841177  0  1  0
## 683   4.07468224  0  1  0
## 684  11.31152669  0  1  0
## 685   0.87806044  1  0  0
## 686  10.32569923  0  1  0
## 687   4.17329731  0  1  0
## 688   2.25772294  0  1  0
## 689   1.02436267  0  1  0
## 690   7.01391477  1  0  0
## 691  13.70822022  0  1  0
## 692  14.96922164  0  0  1
## 693  17.53542855  0  0  1
## 694  14.37841312  0  1  0
## 695   2.63512759  1  0  0
## 696  50.31916874  0  0  1
## 697   6.14509052  0  1  0
## 698   7.75596581  0  1  0
## 699  14.33146249  0  1  0
## 700   4.95786366  1  0  0
## 701   4.80235528  0  1  0
## 702   2.44636674  1  0  0
## 703  -9.34292093  0  1  0
## 704  20.33644853  0  1  0
## 705  13.62174109  1  0  0
## 706  10.25869141  0  1  0
## 707  11.72537452  0  1  0
## 708  12.09547076  0  1  0
## 709  16.95571859  0  1  0
## 710  24.04653367  0  1  0
## 711  16.50779684  0  0  1
## 712  21.31946742  0  1  0
## 713   3.60990940  0  1  0
## 714  13.09842665  0  1  0
## 715  28.47150993  0  0  1
## 716  20.77938384  0  0  1
## 717  16.86681148  0  1  0
## 718   2.99198374  0  1  0
## 719  21.51715796  0  1  0
## 720  14.16181806  0  1  0
## 721   6.21795315  1  0  0
## 722   6.18283754  1  0  0
## 723  14.49462381  0  1  0
## 724  -0.88821837  1  0  0
## 725  11.28210956  0  1  0
## 726   1.72165724  0  1  0
## 727   8.37194371  0  1  0
## 728   6.24107332  0  1  0
## 729   0.35499934  0  1  0
## 730  21.34562379  0  1  0
## 731  40.97005000  0  0  1
## 732  11.80223803  0  1  0
## 733   7.95250937  1  0  0
## 734  31.57077375  0  0  1
## 735   4.61331823  0  1  0
## 736  14.76192223  0  1  0
## 737  20.25509644  0  1  0
## 738   7.97701049  0  1  0
## 739   8.31146633  0  1  0
## 740  22.80793391  0  0  1
## 741  16.91188100  0  1  0
## 742  18.08569147  0  1  0
## 743  45.40535737  0  0  1
## 744  26.05387119  0  1  0
## 745  16.15641349  0  1  0
## 746   2.40264600  1  0  0
## 747  47.04509674  0  0  1
## 748   5.03760497  0  1  0
## 749  52.55984613  0  0  1
## 750  14.88161755  0  1  0
## 751  25.90021784  0  0  1
## 752  13.56959933  0  1  0
## 753  14.20972109  0  1  0
## 754   6.24560539  0  1  0
## 755  10.92091942  0  1  0
## 756  16.70104747  0  1  0
## 757  23.35251635  0  0  1
## 758   9.47009283  0  1  0
## 759   2.87377425  1  0  0
## 760  11.38999646  0  1  0
## 761   9.83959386  0  1  0
## 762   7.80595356  0  1  0
## 763  15.85624632  0  1  0
## 764   4.87984297  1  0  0
## 765  25.18312624  0  1  0
## 766  14.47139149  0  1  0
## 767  25.95612046  0  0  1
## 768  12.37739802  0  1  0
## 769  12.98866804  0  1  0
## 770  28.49006447  0  0  1
## 771   1.28624682  1  0  0
## 772   6.74890121  0  1  0
## 773  23.98629126  0  1  0
## 774   7.32308079  1  0  0
## 775   2.24921276  0  1  0
## 776  42.71932766  0  0  1
## 777  12.67007754  0  1  0
## 778   7.83711887  0  1  0
## 779  14.38659902  0  1  0
## 780   7.25211786  0  1  0
## 781  19.77486090  0  1  0
## 782  17.17015958  0  1  0
## 783  11.40661128  0  1  0
## 784   0.08349592  1  0  0
## 785   5.66808622  1  0  0
## 786  11.84514581  0  1  0
## 787  20.83252156  0  1  0
## 788  11.00680950  1  0  0
## 789   0.12972273  1  0  0
## 790   6.32600950  0  1  0
## 791  16.70805219  0  1  0
## 792  -0.14474395  0  1  0
## 793  27.28203555  0  1  0
## 794   7.82632632  0  1  0
## 795   9.54483023  0  1  0
## 796  19.32122530  0  1  0
## 797  14.16775531  0  1  0
## 798   5.97697101  0  1  0
## 799  14.25884575  0  1  0
## 800  11.28210505  0  1  0
## 801   6.78425928  0  1  0
## 802   2.76200745  0  1  0
## 803  18.74910956  0  1  0
## 804  29.49501473  0  0  1
## 805  20.37645004  0  1  0
## 806  12.81254307  0  1  0
## 807  11.32011493  0  1  0
## 808  40.08825773  0  0  1
## 809  12.67984050  0  1  0
## 810   1.78608946  0  1  0
## 811  10.25528133  1  0  0
## 812   2.15029898  1  0  0
## 813  11.07010764  0  1  0
## 814  10.04811736  0  1  0
## 815   9.13687607  0  1  0
## 816  17.50111753  0  1  0
## 817   6.72534165  0  1  0
## 818  29.61527921  0  0  1
## 819   6.22148752  0  1  0
## 820   6.87591679  1  0  0
## 821  -1.50493628  1  0  0
## 822  25.07549936  0  1  0
## 823  10.15411982  0  1  0
## 824  13.90857341  0  1  0
## 825   9.61858204  0  1  0
## 826  10.66804211  0  1  0
## 827   0.54184457  0  1  0
## 828  13.92616755  1  0  0
## 829   3.95692396  1  0  0
## 830  10.30796850  1  0  0
## 831  22.13067113  0  0  1
## 832   4.62965476  0  1  0
## 833   9.01081917  0  1  0
## 834  23.53029496  0  0  1
## 835   8.36943276  0  1  0
## 836  16.65205568  0  1  0
## 837  -4.21822105  1  0  0
## 838  12.33850796  0  1  0
## 839  17.45415144  0  1  0
## 840   6.94447287  0  1  0
## 841   8.01637437  0  1  0
## 842  47.93607946  0  0  1
## 843   6.33724355  0  1  0
## 844  15.16964385  0  1  0
## 845  20.80672159  0  1  0
## 846   5.54800881  0  1  0
## 847   4.16081558  0  1  0
## 848  16.75961925  0  0  1
## 849  32.45204096  0  0  1
## 850   1.96971761  0  1  0
## 851   2.29994526  0  1  0
## 852  16.59822901  0  1  0
## 853  32.59854994  0  0  1
## 854   2.48566228  1  0  0
## 855  48.53157077  0  0  1
## 856   6.22698600  0  1  0
## 857  12.08549853  1  0  0
## 858  10.33280701  0  1  0
## 859  12.25655784  0  1  0
## 860  21.90172488  0  0  1
## 861  25.19536504  0  1  0
## 862   9.44628260  0  1  0
## 863  -2.34132523  1  0  0
## 864   4.31277884  0  1  0
## 865   8.13981565  0  1  0
## 866  55.27407028  0  0  1
## 867  -0.33746087  1  0  0
## 868  20.09807295  0  0  1
## 869  14.05665347  0  1  0
## 870  38.44783308  0  0  1
## 871  10.59580189  1  0  0
## 872  19.51848850  0  1  0
## 873   5.85078419  1  0  0
## 874  16.76773877  0  1  0
## 875  31.25106093  0  0  1
## 876  42.31532101  0  0  1
## 877  30.35747205  0  1  0
## 878  25.11410983  0  0  1
## 879  25.99828746  0  0  1
## 880  12.27114660  0  1  0
## 881  14.22599821  0  1  0
## 882  -2.73074669  0  1  0
## 883  12.32878664  0  1  0
## 884  20.34453809  0  0  1
## 885  17.73854621  0  1  0
## 886  18.82414437  0  1  0
## 887  11.34788490  1  0  0
## 888  -1.52911609  0  1  0
## 889  11.04572820  0  1  0
## 890   9.84623665  0  1  0
## 891  -1.42584231  0  1  0
## 892  22.01560602  0  1  0
## 893  10.53912539  1  0  0
## 894  33.44589824  0  0  1
## 895  18.11886750  0  1  0
## 896   1.56456059  0  1  0
## 897  11.34751899  0  1  0
## 898   7.08442801  0  1  0
## 899  11.97873583  0  1  0
## 900  15.79667999  0  1  0
## 901  12.65900696  1  0  0
## 902   3.26916662  0  1  0
## 903  12.50272634  0  1  0
## 904  39.35567549  0  0  1
## 905  20.56090838  0  0  1
## 906  -6.84647705  0  1  0
## 907  21.22374806  0  1  0
## 908   5.18618978  0  1  0
## 909   7.69018693  0  1  0
## 910  19.39034031  0  1  0
## 911  62.74241260  0  0  1
## 912  -0.46649450  1  0  0
## 913  15.99303923  0  1  0
## 914   4.79439994  1  0  0
## 915  16.47144393  0  1  0
## 916  28.49170170  0  0  1
## 917  28.90348847  0  0  1
## 918   1.46056394  1  0  0
## 919  11.46124758  0  1  0
## 920  33.79411929  0  0  1
## 921  -4.47763616  0  1  0
## 922  13.91404120  0  1  0
## 923   1.87595663  0  1  0
## 924  13.42952139  0  1  0
## 925  15.69679710  0  1  0
## 926  32.21787014  0  0  1
## 927  10.20769949  0  1  0
## 928  12.19549525  0  1  0
## 929  51.96506237  0  0  1
## 930   9.87125750  0  1  0
## 931  10.63741380  0  1  0
## 932  10.55482689  1  0  0
## 933  13.44024096  0  1  0
## 934   1.99886304  0  1  0
## 935  13.10178123  0  1  0
## 936  -0.49797287  0  1  0
## 937   2.92906798  0  1  0
## 938  11.84695675  0  1  0
## 939  -2.83322544  0  1  0
## 940  17.66535106  0  0  1
## 941  28.20130463  0  0  1
## 942   3.43498139  0  1  0
## 943  11.25814633  1  0  0
## 944   4.54204507  0  1  0
## 945   9.10542824  0  1  0
## 946  14.92772099  0  1  0
## 947   2.76022765  0  1  0
## 948  36.61079310  0  0  1
## 949   5.96876271  0  1  0
## 950  18.78984915  1  0  0
## 951   9.56050594  0  1  0
## 952   7.35333371  1  0  0
## 953   9.61863177  0  1  0
## 954  10.82386970  0  1  0
## 955  20.11780189  0  1  0
## 956   3.22198822  1  0  0
## 957  12.01251226  0  1  0
## 958  14.65754922  0  1  0
## 959   2.53116135  1  0  0
## 960  30.17641642  0  1  0
## 961  10.97584087  0  1  0
## 962  54.31816741  0  0  1
## 963   9.56933193  0  1  0
## 964  17.16601852  0  1  0
## 965   0.80478609  0  1  0
## 966  15.35313494  0  1  0
## 967   9.84492893  1  0  0
## 968  -1.34710439  1  0  0
## 969   5.55610575  0  1  0
## 970   7.33200309  0  1  0
## 971  16.16605761  0  1  0
## 972   2.65193888  1  0  0
## 973   3.62807334  1  0  0
## 974  12.68466446  1  0  0
## 975  15.16866201  0  1  0
## 976   9.57109073  1  0  0
## 977   7.76539700  0  1  0
## 978   5.12905739  0  1  0
## 979   4.39118554  0  1  0
## 980  15.15203895  0  1  0
## 981  45.62522524  0  0  1
## 982  11.92286227  1  0  0
## 983   4.82725817  0  1  0
## 984  -1.90539309  0  1  0
## 985  34.20031783  0  0  1
## 986   2.57733366  0  1  0
## 987   9.96522218  0  1  0
## 988  26.75101596  0  1  0
## 989   3.64104722  1  0  0
## 990  39.20800411  0  0  1
## 991  11.29036560  0  1  0
## 992   9.49566282  1  0  0
## 993  15.49054149  0  1  0
## 994   4.74618497  0  1  0
## 995   6.40012227  0  1  0
## 996   9.68770335  0  1  0
## 997  23.27796586  0  0  1
## 998   6.05484776  1  0  0
## 999   9.32273117  0  1  0
## 1000  5.89033473  0  1  0
step.mod <- lm(y~c1+c2+c3)
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70))
lines(data.x,lin.mod$fitted.values,col="purple")
lines(data.x[ix], step.mod$fitted.values[ix],col="red")

summary(step.mod)
## 
## Call:
## lm(formula = y ~ c1 + c2 + c3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.493  -4.885  -0.375   4.734  35.863 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  31.0026     0.5833   53.15   <2e-16 ***
## c11         -25.7742     0.8149  -31.63   <2e-16 ***
## c21         -19.8529     0.6474  -30.67   <2e-16 ***
## c31               NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.309 on 997 degrees of freedom
## Multiple R-squared:  0.5422, Adjusted R-squared:  0.5413 
## F-statistic: 590.4 on 2 and 997 DF,  p-value: < 2.2e-16

Perbandingan Model

nilai_AIC <- rbind(AIC(lin.mod),
                   AIC(pol.mod),
                   AIC(step.mod))

nama_model <- c("Linear","Poly (ordo=2)","Tangga (breaks=3)")
data.frame(nama_model,nilai_AIC)
##          nama_model nilai_AIC
## 1            Linear  6568.395
## 2     Poly (ordo=2)  6075.346
## 3 Tangga (breaks=3)  6821.116
MSE= function(pred,actual)
{
  mean((pred-actual)^2)
}
MSE(predict(lin.mod), y)
## [1] 41.45124
MSE(predict(pol.mod), y)
## [1] 25.26623
MSE(predict(step.mod), y)
## [1] 53.26283

Berdasarkan nilai AIC dan MSE terkecil, model regresi polinomial dengan ordo 2 merupakan model terbaik dibandingkan model lainnya.

Pertemuan 9

Piecewise Cubic Polynomial

dt.all <- cbind(y,data.x)

#Knots=1
##Jumlah amatan data yang >=1
dt1 <- dt.all[data.x <=1,]
dim(dt1)
## [1] 495   2
##Jumlah amatan data yang >1
dt2 <- dt.all[data.x >1,]
dim(dt2)
## [1] 505   2
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), pch=16, col="orange")

cub.mod1 <- lm(dt1[,1]~dt1[,2]+I(dt1[,2]^2)+I(dt1[,2]^3))
ix <- sort(dt1[,2], index.return=T)$ix
lines(dt1[ix,2],cub.mod1$fitted.values[ix],col="blue", lwd=2)

cub.mod2 <- lm(dt2[,1]~dt2[,2]+I(dt2[,2]^2)+I(dt2[,2]^3))
ix <- sort(dt2[,2], index.return=T)$ix
lines(dt2[ix,2],cub.mod2$fitted.values[ix],col="purple", lwd=2)

Truncated Power Basic

plot(data.x,y,xlim=c( -2,5), ylim=c( -10,70), pch=16,col="orange")
abline(v=1,col="red", lty=2)

hx <- ifelse( data.x>1,(data.x-1)^3,0)

cubspline.mod <- lm( y~data.x+I(data.x^2)+I(data.x^3)+hx)
ix <- sort( data.x,index.return=T)$ix
lines( data.x[ix], cubspline.mod$fitted.values[ix],col="purple", lwd=2)

Perbandingan dengan k-fold CV

plot( data.x,y,xlim=c(-2,5), ylim=c( -10,70), pch=16,col="orange")
abline(v=0,col="red", lty=2)
abline(v=2,col="red", lty=2)

hx1 <- ifelse( data.x>0,(data.x-0)^3,0)
hx2 <- ifelse( data.x>2,(data.x-2)^3,0)

cubspline.mod2 <- lm( y~data.x+I(data.x^2)+I(data.x^3)+hx1+hx2)
ix <- sort( data.x,index.return=T)$ix
lines( data.x[ix],cubspline.mod2$fitted.values[ix],col="blue", lwd=2)

Perbandingan 1 Knots dan 2 Knots dengan 5-fold CV

##1 knots
data.all <- cbind( y,data.x,hx)
set.seed(456)
ind <- sample(1:5,length( data.x),replace=T)
res <- c()
for( i in 1:5){
  dt.train <- data.all[ ind!=i,]
  x.test <- data.all[ ind==i, -1]
  y.test <- data.all[ ind==i,1]
  mod1 <- lm( dt.train[,1]~dt.train[,2]+I( dt.train[,2]^2)+I( dt.train[,2]^3)+dt.train[,3])
  x.test.olah <- cbind(1,x.test[,1], x.test[,1]^2,x.test[,1]^3,x.test[,2])
  beta <- coefficients(mod1)
  prediksi <- x.test.olah%*%beta
  res <- c( res,mean(( y.test-prediksi)^2))
}
res
## [1] 22.08767 21.39598 29.47677 29.68683 25.18070
mean(res)
## [1] 25.56559
##2 knots (knots = 0,2)
data.all2 <- cbind(y,data.x,hx1,hx2)
set.seed(456)
ind2 <- sample(1:5,length( data.x),replace=T)
res2 <- c()
for( i in 1:5){
  dt.train2 <- data.all2[ind2!=i,]
  x.test2 <- data.all2[ind2==i,-1]
  y.test2 <- data.all2[ind2==i,1]
  mod2 <- lm(dt.train2[,1]~dt.train2[,2]+I(dt.train2[,2]^2)+I(dt.train2[,2]^3)+dt.train2[,3]+dt.train2[,4])
  x.test.olah2 <- cbind(1,x.test2[,1],x.test2[,1]^2,x.test2[,1]^3,x.test2[,2],x.test2[,3])
  beta2 <- coefficients(mod2)
  prediksi2 <- x.test.olah2%*%beta2
  res2 <- c(res2,mean((y.test2-prediksi2)^2))
}
res2
## [1] 22.32898 21.33868 29.40459 29.79913 25.22047
mean(res2)
## [1] 25.61837

Smoothing Spline

ss1 <- smooth.spline(data.x,y,all.knots = T)
ss2 <- smooth.spline(data.x,y,all.knots = F)
plot(data.x,y,xlim=c(-2,5), ylim=c(-10,70), pch=16,col="orange")
lines(ss1,col="purple", lwd=2)
lines(ss2,col="green", lwd=2)

Fungsi all.knots = T menghasilkan visualisasi yang lebih mempertimbahnkan semua amatan x dimana menyesuaikan pola data dan residualnya, sehingga hasilnya lebih fitted dibandingkan fungsi Fungsi all.knots = F hasilnya lebih mulus.

Contoh Responsi

Data TRICEPS

Data yang digunakan untuk ilustrasi berasal dari studi antropometri terhadap 892 perempuan di bawah 50 tahun di tiga desa Gambia di Afrika Barat. Data terdiri dari 3 Kolom yaitu Age, Intriceps, dan Tricepts. Berikut adalah penjelasannya pada masing-masing kolom:
- Age          : umur responden
- Intriceps  : logaritma dari ketebalan lipatan kulit triceps
- Triceps     : ketebalan lipatan kulit triceps
Lipatan kulit trisep diperlukan untuk menghitung lingkar otot lengan atas. Ketebalannya memberikan informasi tentang cadangan lemak tubuh, sedangkan massa otot yang dihitung memberikan informasi tentang cadangan protein.

Peubah yang digunakan, yaitu:
x : age
y : triceps

data("triceps", package="MultiKink")

Scatter Plot

ggplot(triceps,aes(x=age, y=triceps)) +
                 geom_point(alpha=0.55, color="blue") + 
                 theme_bw() 

Berdasarkan pola hubungan yang terlihat pada scatterplot, kita akan mencoba untuk mencari model yang bisa merepresentasikan pola hubungan tersebut dengan baik.

Pertemuan 8

Regresi Linear

mod_linear <- lm(triceps~age,data=triceps)
summary(mod_linear)
## 
## Call:
## lm(formula = triceps ~ age, data = triceps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.9512  -2.3965  -0.5154   1.5822  25.1233 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.19717    0.21244   29.17   <2e-16 ***
## age          0.21584    0.01014   21.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.007 on 890 degrees of freedom
## Multiple R-squared:  0.3372, Adjusted R-squared:  0.3365 
## F-statistic: 452.8 on 1 and 890 DF,  p-value: < 2.2e-16

Ringkasan Nilai R-squared dan AIC

ringkasan_linear <- summary(mod_linear)
ringkasan_linear$r.squared
## [1] 0.3372092
AIC(mod_linear)
## [1] 5011.515
tabel_data <- rbind(data.frame("Y_Aktual" = triceps$triceps,
                               "Y_Prediksi" = predict(mod_linear),
                               "Residuals" = residuals(mod_linear)))
kableExtra::kable(head(tabel_data) ,caption = 'Ringkasan Nilai R-Squared dan AIC')
Ringkasan Nilai R-Squared dan AIC
Y_Aktual Y_Prediksi Residuals
3.4 8.798074 -5.398074
4.0 8.336171 -4.336171
4.2 8.364231 -4.164231
4.2 8.677202 -4.477202
4.4 8.381498 -3.981498
4.4 8.759222 -4.359222

Scatter Plot

ggplot(triceps,aes(x=age, y=triceps)) +
                 geom_point(alpha=0.55, color="black") +
   stat_smooth(method = "lm", 
               formula = y~x,lty = 1,
               col = "purple",se = F)+
  theme_bw()

Regresi Polinomial Derajat 2 (Ordo 2)

mod_polinomial2 <- lm(triceps ~ poly(age,2,raw = T),
                     data=triceps)
summary(mod_polinomial2)
## 
## Call:
## lm(formula = triceps ~ poly(age, 2, raw = T), data = triceps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.5677  -2.4401  -0.4587   1.6368  24.9961 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             6.0229191  0.3063806  19.658  < 2e-16 ***
## poly(age, 2, raw = T)1  0.2434733  0.0364403   6.681 4.17e-11 ***
## poly(age, 2, raw = T)2 -0.0006257  0.0007926  -0.789     0.43    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.008 on 889 degrees of freedom
## Multiple R-squared:  0.3377, Adjusted R-squared:  0.3362 
## F-statistic: 226.6 on 2 and 889 DF,  p-value: < 2.2e-16

Scatter Plot

ggplot(triceps,aes(x=age, y=triceps)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~poly(x,2,raw=T), 
               lty = 1, col = "purple",se = F)+
  theme_bw()

Scatter Plot dengan Standard Error

ggplot(triceps,aes(x=age, y=triceps)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~poly(x,2,raw=T), 
               lty = 1, col = "purple",se = T)+
  theme_bw()

Regresi Polinomial Derajat 3 (Ordo 3)

mod_polinomial3 = lm(triceps ~ poly(age,3,raw = T),data=triceps)
summary(mod_polinomial3)
## 
## Call:
## lm(formula = triceps ~ poly(age, 3, raw = T), data = triceps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.5832  -1.9284  -0.5415   1.3283  24.4440 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             8.004e+00  3.831e-01  20.893  < 2e-16 ***
## poly(age, 3, raw = T)1 -3.157e-01  7.721e-02  -4.089 4.73e-05 ***
## poly(age, 3, raw = T)2  3.101e-02  3.964e-03   7.824 1.45e-14 ***
## poly(age, 3, raw = T)3 -4.566e-04  5.612e-05  -8.135 1.38e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.868 on 888 degrees of freedom
## Multiple R-squared:  0.3836, Adjusted R-squared:  0.3815 
## F-statistic: 184.2 on 3 and 888 DF,  p-value: < 2.2e-16

Pada model ini, peubah semua \(X^2\) signifikan dan nilai R-squared naik dari model sebelumnya.

Scatter Plot dengan Standard Error

ggplot(triceps,aes(x=age, y=triceps)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~poly(x,3,raw=T), 
               lty = 1, col = "purple",se = T)+
  theme_bw()

Perbandingan Model Liniear & Polinomial

AIC(mod_linear)
## [1] 5011.515
AIC(mod_polinomial2)
## [1] 5012.89
AIC(mod_polinomial3)
## [1] 4950.774
MSE(predict(mod_linear),triceps$triceps)
## [1] 16.01758
MSE(predict(mod_polinomial2),triceps$triceps)
## [1] 16.00636
MSE(predict(mod_polinomial3),triceps$triceps)
## [1] 14.89621

Regresi Fungsi Tangga (5)

mod_tangga5 <- lm(triceps ~ cut(age,5),data=triceps)
summary(mod_tangga5)
## 
## Call:
## lm(formula = triceps ~ cut(age, 5), data = triceps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.5474  -2.0318  -0.4465   1.3682  23.3759 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              7.2318     0.1994  36.260  < 2e-16 ***
## cut(age, 5)(10.6,20.9]   1.6294     0.3244   5.023 6.16e-07 ***
## cut(age, 5)(20.9,31.2]   5.9923     0.4222  14.192  < 2e-16 ***
## cut(age, 5)(31.2,41.5]   7.5156     0.4506  16.678  < 2e-16 ***
## cut(age, 5)(41.5,51.8]   7.4561     0.5543  13.452  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.939 on 887 degrees of freedom
## Multiple R-squared:  0.3617, Adjusted R-squared:  0.3588 
## F-statistic: 125.7 on 4 and 887 DF,  p-value: < 2.2e-16

Scatter Plot

ggplot(triceps,aes(x=age, y=triceps)) +
       geom_point(alpha=0.55, color="black") +
       stat_smooth(method = "lm", 
               formula = y~cut(x,5), 
               lty = 1, col = "purple",se = F)+
  theme_bw()

Regresi Fungsi Tangga (7)

mod_tangga7 <- lm(triceps ~ cut(age,7),data=triceps)
summary(mod_tangga7)
## 
## Call:
## lm(formula = triceps ~ cut(age, 7), data = triceps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.8063  -1.7592  -0.4366   1.2894  23.1461 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              7.5592     0.2219  34.060  < 2e-16 ***
## cut(age, 7)(7.62,15]    -0.6486     0.3326  -1.950   0.0515 .  
## cut(age, 7)(15,22.3]     3.4534     0.4239   8.146 1.27e-15 ***
## cut(age, 7)(22.3,29.7]   5.8947     0.4604  12.804  < 2e-16 ***
## cut(age, 7)(29.7,37]     7.8471     0.5249  14.949  < 2e-16 ***
## cut(age, 7)(37,44.4]     6.9191     0.5391  12.835  < 2e-16 ***
## cut(age, 7)(44.4,51.8]   6.3013     0.6560   9.606  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.805 on 885 degrees of freedom
## Multiple R-squared:  0.4055, Adjusted R-squared:  0.4014 
## F-statistic: 100.6 on 6 and 885 DF,  p-value: < 2.2e-16

Scatter Plot

ggplot(triceps,aes(x=age, y=triceps)) +
       geom_point(alpha=0.55, color="black") +
       stat_smooth(method = "lm", 
               formula = y~cut(x,7), 
               lty = 1, col = "purple",se = F)+
  theme_bw()

Akurasi Model

MSE <- function(pred,actual){
  mean((pred-actual)^2)
}
Nilai_MSE <- rbind(MSE(predict(mod_linear),triceps$triceps),
              MSE(predict(mod_polinomial2),triceps$triceps),
              MSE(predict(mod_polinomial3),triceps$triceps),
              MSE(predict(mod_tangga5),triceps$triceps),
              MSE(predict(mod_tangga7),triceps$triceps))
Nama_Model <- c("Linear","Poly (ordo=2)", "Poly (ordo=3)",
                "Tangga (breaks=5)", "Tangga (breaks=7)")
ringkasan <- data.frame(Nama_Model, Nilai_MSE)
kableExtra::kable(head(ringkasan) ,caption = 'Perbandingan Akurasi Ketiga Model')
Perbandingan Akurasi Ketiga Model
Nama_Model Nilai_MSE
Linear 16.01758
Poly (ordo=2) 16.00636
Poly (ordo=3) 14.89621
Tangga (breaks=5) 15.42602
Tangga (breaks=7) 14.36779

Berdasarkan nilai MSE terkecil, model regresi fungsi tangga dengan breaks sebesar 7 merupakan model terbaik dibandingkan model lainnya.

Evaluasi Model dengan Cross Validation

Regresi Linear

set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps") #Biasanya menggunakan v = 5 atau 10 (batas wajar)
metric_linear <- map_dfr(cross_val$splits,
    function(x){
    mod <- lm(triceps ~ age,data=triceps[x$in_id,])
    pred <- predict(mod,newdata=triceps[-x$in_id,])
    truth <- triceps[-x$in_id,]$triceps
    rmse <- mlr3measures::rmse(truth = truth,response = pred)
    mae <- mlr3measures::mae(truth = truth,response = pred)
    metric <- c(rmse,mae)
    names(metric) <- c("rmse","mae") #mae diabsolutkan
    return(metric)
  }
)
metric_linear
## # A tibble: 10 x 2
##     rmse   mae
##    <dbl> <dbl>
##  1  3.65  2.82
##  2  4.62  3.22
##  3  4.38  3.00
##  4  3.85  2.80
##  5  3.08  2.36
##  6  3.83  2.81
##  7  3.59  2.78
##  8  4.66  3.06
##  9  3.50  2.56
## 10  4.59  2.93
#Menghitung rata-rata untuk 10 folds
mean_metric_linear <- colMeans(metric_linear)
mean_metric_linear
##     rmse      mae 
## 3.973249 2.833886

Regresi Polinomial Derajat 2

set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
metric_poly2 <- map_dfr(cross_val$splits,
function(x){
  mod <- lm(triceps ~ poly(age,2,raw = T),data=triceps[x$in_id,])
  pred <- predict(mod,newdata=triceps[-x$in_id,])
  truth <- triceps[-x$in_id,]$triceps
  rmse <- mlr3measures::rmse(truth = truth,response = pred)
  mae <- mlr3measures::mae(truth = truth,response = pred)
  metric <- c(rmse,mae)
  names(metric) <- c("rmse","mae")
  return(metric)
  }
)
metric_poly2
## # A tibble: 10 x 2
##     rmse   mae
##    <dbl> <dbl>
##  1  3.64  2.82
##  2  4.62  3.26
##  3  4.39  3.02
##  4  3.85  2.81
##  5  3.10  2.38
##  6  3.82  2.81
##  7  3.62  2.83
##  8  4.65  3.07
##  9  3.50  2.57
## 10  4.59  2.94
#Menghitung rata-rata untuk 10 folds
mean_metric_poly2 <- colMeans(metric_poly2)
mean_metric_poly2
##     rmse      mae 
## 3.977777 2.851787

Regresi Polinomial Derajat 3

set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")

metric_poly3 <- map_dfr(cross_val$splits,
function(x){
  mod <- lm(triceps ~ poly(age,3,raw = T),data=triceps[x$in_id,])
  pred <- predict(mod,newdata=triceps[-x$in_id,])
  truth <- triceps[-x$in_id,]$triceps
  rmse <- mlr3measures::rmse(truth = truth,response = pred)
  mae <- mlr3measures::mae(truth = truth,response = pred)
  metric <- c(rmse,mae)
  names(metric) <- c("rmse","mae")
  return(metric)
  }
)
metric_poly3
## # A tibble: 10 x 2
##     rmse   mae
##    <dbl> <dbl>
##  1  3.49  2.60
##  2  4.48  2.99
##  3  4.21  2.85
##  4  4.02  2.75
##  5  3.03  2.09
##  6  3.63  2.59
##  7  3.53  2.52
##  8  4.54  2.91
##  9  3.27  2.33
## 10  4.27  2.68
#Menghitung rata-rata untuk 10 folds
mean_metric_poly3 <- colMeans(metric_poly3)
mean_metric_poly3
##     rmse      mae 
## 3.845976 2.632125

Regresi Fungsi Tangga

set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")
breaks <- 3:10 #titik cut data
best_tangga <- map_dfr(breaks, function(i){
    metric_tangga <- map_dfr(cross_val$splits,
    function(x){
        training <- triceps[x$in_id,]
        training$age <- cut(training$age,i)
        mod <- lm(triceps ~ age,data=training)
        labs_x <- levels(mod$model[,2])
        labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
                  upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))
        testing <- triceps[-x$in_id,]
        age_new <- cut(testing$age,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
        pred <- predict(mod,newdata=list(age=age_new))
        truth <- testing$triceps
        data_eval <- na.omit(data.frame(truth,pred))
        rmse <- mlr3measures::rmse(truth = data_eval$truth,response = data_eval$pred)
        mae <- mlr3measures::mae(truth = data_eval$truth,response = data_eval$pred)
        metric <- c(rmse,mae)
        names(metric) <- c("rmse","mae")
        return(metric)
      }
    )
  metric_tangga
  #Menghitung rata-rata untuk 10 folds
  mean_metric_tangga <- colMeans(metric_tangga)
  mean_metric_tangga
  }
)

best_tangga <- cbind(breaks=breaks,best_tangga)
#Menampilkan hasil all breaks
best_tangga
##   breaks     rmse      mae
## 1      3 3.835357 2.618775
## 2      4 3.882932 2.651911
## 3      5 3.917840 2.724368
## 4      6 3.836068 2.622939
## 5      7 3.789715 2.555062
## 6      8 3.812789 2.555563
## 7      9 3.781720 2.518706
## 8     10 3.795877 2.529479
#Berdasarkan rmse terkecil
best_tangga %>% slice_min(rmse)
##   breaks    rmse      mae
## 1      9 3.78172 2.518706
#Berdasarkan mae terkecil
best_tangga %>% slice_min(mae)
##   breaks    rmse      mae
## 1      9 3.78172 2.518706

Perbandingan Model

nilai_metric <- rbind(mean_metric_linear,
                      mean_metric_poly2,
                      mean_metric_poly3,
                      best_tangga %>% dplyr::select(-1) %>% slice_min(mae))

nama_model <- c("Linear","Poly2","Poly3","Tangga (breaks=9)")
data.frame(nama_model,nilai_metric)
##          nama_model     rmse      mae
## 1            Linear 3.973249 2.833886
## 2             Poly2 3.977777 2.851787
## 3             Poly3 3.845976 2.632125
## 4 Tangga (breaks=9) 3.781720 2.518706

Berdasarkan nilai rmse dan mae terkecil, model regresi fungsi tangga dengan breaks sebesar 9 merupakan model terbaik dibandingkan model lainnya.

Pertemuan 9

Regresi Spline

#Menentukan banyaknya fungsi basis dan knots
dim(bs(triceps$age, knots = c(10, 20,40)))
## [1] 892   6
#atau
dim(bs(triceps$age, df=6))
## [1] 892   6
#Nilai knots yang ditentukan oleh komputer
attr(bs(triceps$age, df=6),"knots")
##     25%     50%     75% 
##  5.5475 12.2100 24.7275

b-spline

Knots Manual

knot_value_manual_3 = c(10, 20,40)
mod_spline_1 = lm(triceps ~ bs(age, knots =knot_value_manual_3 ),data=triceps)
summary(mod_spline_1)
## 
## Call:
## lm(formula = triceps ~ bs(age, knots = knot_value_manual_3), 
##     data = triceps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.385  -1.682  -0.393   1.165  22.855 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            8.22533    0.61873  13.294  < 2e-16 ***
## bs(age, knots = knot_value_manual_3)1 -0.06551    1.14972  -0.057    0.955    
## bs(age, knots = knot_value_manual_3)2 -4.30051    0.72301  -5.948 3.90e-09 ***
## bs(age, knots = knot_value_manual_3)3  7.80435    1.17793   6.625 6.00e-11 ***
## bs(age, knots = knot_value_manual_3)4  6.14890    1.27439   4.825 1.65e-06 ***
## bs(age, knots = knot_value_manual_3)5  5.56640    1.42225   3.914 9.78e-05 ***
## bs(age, knots = knot_value_manual_3)6  7.90178    1.54514   5.114 3.87e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.76 on 885 degrees of freedom
## Multiple R-squared:  0.4195, Adjusted R-squared:  0.4155 
## F-statistic: 106.6 on 6 and 885 DF,  p-value: < 2.2e-16

Scatter Plot

ggplot(triceps,aes(x=age, y=triceps)) +
                 geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~bs(x, knots = knot_value_manual_3), 
               lty = 1,se = F)

Knots Fungsi

knot_value_pc_df_6 = attr(bs(triceps$age, df=6),"knots")
mod_spline_1 = lm(triceps ~ bs(age, knots =knot_value_pc_df_6 ),data=triceps)
summary(mod_spline_1)
## 
## Call:
## lm(formula = triceps ~ bs(age, knots = knot_value_pc_df_6), data = triceps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.0056  -1.7556  -0.2944   1.2008  23.0695 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            6.5925     0.8770   7.517 1.37e-13 ***
## bs(age, knots = knot_value_pc_df_6)1   3.7961     1.5015   2.528  0.01164 *  
## bs(age, knots = knot_value_pc_df_6)2  -2.0749     0.8884  -2.335  0.01974 *  
## bs(age, knots = knot_value_pc_df_6)3   1.5139     1.1645   1.300  0.19391    
## bs(age, knots = knot_value_pc_df_6)4  11.6394     1.3144   8.855  < 2e-16 ***
## bs(age, knots = knot_value_pc_df_6)5   5.9680     1.5602   3.825  0.00014 ***
## bs(age, knots = knot_value_pc_df_6)6   8.9127     1.4053   6.342 3.60e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.757 on 885 degrees of freedom
## Multiple R-squared:  0.4206, Adjusted R-squared:  0.4167 
## F-statistic: 107.1 on 6 and 885 DF,  p-value: < 2.2e-16

Scatter Plot

ggplot(triceps,aes(x=age, y=triceps)) +
                 geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~bs(x, knots = knot_value_pc_df_6), 
               lty = 1,se = F)

Natural Spline

knot_value_manual_3 = c(10, 20,40)
mod_spline3ns = lm(triceps ~ ns(age, knots = knot_value_manual_3),data=triceps)
summary(mod_spline3ns)
## 
## Call:
## lm(formula = triceps ~ ns(age, knots = knot_value_manual_3), 
##     data = triceps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7220  -1.7640  -0.3985   1.1908  22.9684 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                             8.8748     0.3742  23.714  < 2e-16 ***
## ns(age, knots = knot_value_manual_3)1   7.0119     0.6728  10.422  < 2e-16 ***
## ns(age, knots = knot_value_manual_3)2   6.0762     0.8625   7.045 3.72e-12 ***
## ns(age, knots = knot_value_manual_3)3   2.0780     1.0632   1.954    0.051 .  
## ns(age, knots = knot_value_manual_3)4   8.8616     0.9930   8.924  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.762 on 887 degrees of freedom
## Multiple R-squared:  0.4176, Adjusted R-squared:  0.415 
## F-statistic:   159 on 4 and 887 DF,  p-value: < 2.2e-16

Scatter Plot

ggplot(triceps,aes(x=age, y=triceps)) +
                 geom_point(alpha=0.55, color="black")+
      stat_smooth(method = "lm", 
                 formula = y~ns(x, knots = knot_value_manual_3), 
                 lty = 1,se=F)

Smoothing Spline

Fungsi smooth.spline() dapat digunakan untuk melakukan smoothing spline pada R

model_sms <- with(data = triceps,smooth.spline(age,triceps))
model_sms 
## Call:
## smooth.spline(x = age, y = triceps)
## 
## Smoothing Parameter  spar= 0.5162704  lambda= 1.232259e-06 (13 iterations)
## Equivalent Degrees of Freedom (Df): 50.00639
## Penalized Criterion (RSS): 8591.581
## GCV: 13.77835

Fungsi smooth.spline secara otomatis memilih paramter lambda terbaik dengan menggunakan Cross-Validation (CV) dan Generalized Cross-Validation (GCV). Perbedaan utama antara keduanya adalah pada ukuran kebaikan model yang digunakan. Kalau CV menggunakan MSE sebagai ukuran kebaikan model, sedangkan GCV menggunakan Weighted-MSE. Banyaknya folds (lipatan) pada CV dan GCV ini adalah sejumlah observasinya.

Scatter Plot

pred_data <- broom::augment(model_sms)

ggplot(pred_data,aes(x=x,y=y))+
  geom_point(alpha=0.55, color="black")+
  geom_line(aes(y=.fitted),col="blue",
            lty=1)+
  xlab("age")+
  ylab("triceps")+
  theme_bw()

Pengaruh parameter lambda terhadap tingkat smoothness kurva bisa dilihat dari ilustrasi berikut:

model_sms_lambda <- data.frame(lambda=seq(0,5,by=0.5)) %>% 
  group_by(lambda) %>% 
  do(broom::augment(with(data = triceps,smooth.spline(age,triceps,lambda = .$lambda))))

p <- ggplot(model_sms_lambda,
       aes(x=x,y=y))+
  geom_line(aes(y=.fitted),
            col="blue",
            lty=1
            )+
  facet_wrap(~lambda)
p

Jika kita tentukan df=7, maka hasil kurva model smooth.spline akan lebih merepresentasikan data.

model_sms <- with(data = triceps,smooth.spline(age,triceps,df=7))
model_sms
## Call:
## smooth.spline(x = age, y = triceps, df = 7)
## 
## Smoothing Parameter  spar= 1.049874  lambda= 0.008827582 (11 iterations)
## Equivalent Degrees of Freedom (Df): 7.000747
## Penalized Criterion (RSS): 10112.16
## GCV: 14.20355

Scatter Plot

pred_data <- broom::augment(model_sms)

ggplot(pred_data,aes(x=x,y=y))+
  geom_point(alpha=0.55, color="black")+
  geom_line(aes(y=.fitted),col="blue",
            lty=1)+
  xlab("age")+
  ylab("triceps")+
  theme_bw()

LOESS

Masih menggunakan data triceps seperti pada ilustrasi sebelumnya, kali ini kita akan mencoba melakukan pendekatan local regression dengan fungsi loess().

model_loess <- loess(triceps~age,
                     data = triceps)
summary(model_loess)
## Call:
## loess(formula = triceps ~ age, data = triceps)
## 
## Number of Observations: 892 
## Equivalent Number of Parameters: 4.6 
## Residual Standard Error: 3.777 
## Trace of smoother matrix: 5.01  (exact)
## 
## Control settings:
##   span     :  0.75 
##   degree   :  2 
##   family   :  gaussian
##   surface  :  interpolate      cell = 0.2
##   normalize:  TRUE
##  parametric:  FALSE
## drop.square:  FALSE

Pengaruh parameter span terhadap tingkat smoothness kurva bisa dilihat dari ilustrasi berikut:

model_loess_span <- data.frame(span=seq(0.1,5,by=0.5)) %>% 
  group_by(span) %>% 
  do(broom::augment(loess(triceps~age,
                     data = triceps,span=.$span)))

p2 <- ggplot(model_loess_span,
       aes(x=age,y=triceps))+
  geom_line(aes(y=.fitted),
            col="blue",
            lty=1
            )+
  facet_wrap(~span)
p2

Pendekatan ini juga dapat dilakukan dengan fungsi stat_smooth() pada package ggplot2.

ggplot(triceps, aes(age,triceps)) +
  geom_point(alpha=0.5,color="black") +
  stat_smooth(method='loess',
               formula=y~x,
              span = 0.75,
              col="blue",
              lty=1,
              se=F)

Tuning span dapat dilakukan dengan menggunakan cross-validation:

set.seed(123)
cross_val <- vfold_cv(triceps,v=10,strata = "triceps")

span <- seq(0.1,1,length.out=50)

best_loess <- map_dfr(span, function(i){
metric_loess <- map_dfr(cross_val$splits,
    function(x){
mod <- loess(triceps ~ age,span = i,
         data=triceps[x$in_id,])
pred <- predict(mod,
               newdata=triceps[-x$in_id,])
truth <- triceps[-x$in_id,]$triceps

data_eval <- na.omit(data.frame(pred=pred,
                                truth=truth))


rmse <- mlr3measures::rmse(truth = data_eval$truth,
                           response = data_eval$pred
                           )
mae <- mlr3measures::mae(truth = data_eval$truth,
                           response = data_eval$pred
                           )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)

head(metric_loess,20)

#Menghitung rata-rata untuk 10 folds
mean_metric_loess <- colMeans(metric_loess)

mean_metric_loess
}
)

best_loess <- cbind(span=span,best_loess)
#Menampilkan hasil all breaks
best_loess
##         span     rmse      mae
## 1  0.1000000 3.773822 2.495752
## 2  0.1183673 3.771342 2.493506
## 3  0.1367347 3.766450 2.487557
## 4  0.1551020 3.757385 2.480571
## 5  0.1734694 3.749539 2.473494
## 6  0.1918367 3.746288 2.470915
## 7  0.2102041 3.742741 2.467990
## 8  0.2285714 3.740397 2.465388
## 9  0.2469388 3.737724 2.463311
## 10 0.2653061 3.737105 2.462252
## 11 0.2836735 3.735645 2.460514
## 12 0.3020408 3.734522 2.459225
## 13 0.3204082 3.733298 2.457694
## 14 0.3387755 3.732538 2.456280
## 15 0.3571429 3.732082 2.455452
## 16 0.3755102 3.731261 2.454642
## 17 0.3938776 3.730882 2.454293
## 18 0.4122449 3.730585 2.454291
## 19 0.4306122 3.730351 2.454869
## 20 0.4489796 3.730717 2.456222
## 21 0.4673469 3.730831 2.456768
## 22 0.4857143 3.731387 2.458307
## 23 0.5040816 3.732091 2.460189
## 24 0.5224490 3.732392 2.461811
## 25 0.5408163 3.733609 2.464543
## 26 0.5591837 3.734393 2.467180
## 27 0.5775510 3.735554 2.470517
## 28 0.5959184 3.736679 2.473012
## 29 0.6142857 3.738227 2.476519
## 30 0.6326531 3.741401 2.476887
## 31 0.6510204 3.742660 2.479842
## 32 0.6693878 3.744336 2.483581
## 33 0.6877551 3.746651 2.488792
## 34 0.7061224 3.748708 2.492180
## 35 0.7244898 3.750734 2.495218
## 36 0.7428571 3.752815 2.498359
## 37 0.7612245 3.753998 2.504175
## 38 0.7795918 3.755981 2.511421
## 39 0.7979592 3.756794 2.514375
## 40 0.8163265 3.759456 2.522994
## 41 0.8346939 3.767892 2.543250
## 42 0.8530612 3.777168 2.557863
## 43 0.8714286 3.782407 2.566972
## 44 0.8897959 3.788428 2.578042
## 45 0.9081633 3.797002 2.595698
## 46 0.9265306 3.803259 2.609852
## 47 0.9448980 3.809478 2.623386
## 48 0.9632653 3.824192 2.655691
## 49 0.9816327 3.835596 2.679155
## 50 1.0000000 3.860514 2.718911
#Berdasarkan nilai rmse terkecil
best_loess %>% slice_min(rmse)
##        span     rmse      mae
## 1 0.4306122 3.730351 2.454869
#Berdasarkan nilai mae terkecil
best_loess %>% slice_min(mae)
##        span     rmse      mae
## 1 0.4122449 3.730585 2.454291

Scatter Plot

ggplot(triceps, aes(age,triceps)) +
  geom_point(alpha=0.5,color="black") +
  stat_smooth(method='loess',
              formula=y~x,
              span = 0.4306122,
              col="blue",
              lty=1,
              se=F)

Latihan: Data Auto

Lakukan analisis regresi nonlinier pada data berikut:

Pra-proccessing

autodt <- Auto %>%  dplyr::select(mpg, horsepower, origin)
str(autodt)
## 'data.frame':    392 obs. of  3 variables:
##  $ mpg       : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ horsepower: num  130 165 150 150 140 198 220 215 225 190 ...
##  $ origin    : num  1 1 1 1 1 1 1 1 1 1 ...
#Statistik dekriptif
summary(autodt)
##       mpg          horsepower        origin     
##  Min.   : 9.00   Min.   : 46.0   Min.   :1.000  
##  1st Qu.:17.00   1st Qu.: 75.0   1st Qu.:1.000  
##  Median :22.75   Median : 93.5   Median :1.000  
##  Mean   :23.45   Mean   :104.5   Mean   :1.577  
##  3rd Qu.:29.00   3rd Qu.:126.0   3rd Qu.:2.000  
##  Max.   :46.60   Max.   :230.0   Max.   :3.000
kableExtra::kable(head(autodt) ,caption = 'Tabulasi Data Auto')
Tabulasi Data Auto
mpg horsepower origin
18 130 1
15 165 1
18 150 1
16 150 1
17 140 1
15 198 1

Eksploratif

pairs(autodt, pch=19, lower.panel=NULL)

mpg <- autodt$mpg
horsepower <- autodt$horsepower
origin <- autodt$origin
clist <- c("lightblue", "orange", "purple")
plot(mpg, horsepower, pch=19, col=clist[origin])

Pemodelan

Model origin ~ mpg

Regresi Linear V1

lin.mod.1 <- lm(origin ~ mpg, data=autodt)
plot(mpg, origin, pch=19)
lines(mpg, lin.mod.1$fitted.values, col="purple")

summary(lin.mod.1)
## 
## Call:
## lm(formula = origin ~ mpg, data = autodt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.48384 -0.40469 -0.08386  0.34740  1.74114 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.208871   0.106520   1.961   0.0506 .  
## mpg         0.058333   0.004311  13.531   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6654 on 390 degrees of freedom
## Multiple R-squared:  0.3195, Adjusted R-squared:  0.3177 
## F-statistic: 183.1 on 1 and 390 DF,  p-value: < 2.2e-16

Regresi Linear V2

lin.mod.2 <- lm(horsepower ~ mpg, data=autodt)
plot(mpg, horsepower, pch=19)
lines(mpg, lin.mod.2$fitted.values, col="purple")

summary(lin.mod.2)
## 
## Call:
## lm(formula = horsepower ~ mpg, data = autodt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.892 -15.716  -2.094  13.108  96.947 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 194.4756     3.8732   50.21   <2e-16 ***
## mpg          -3.8389     0.1568  -24.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.19 on 390 degrees of freedom
## Multiple R-squared:  0.6059, Adjusted R-squared:  0.6049 
## F-statistic: 599.7 on 1 and 390 DF,  p-value: < 2.2e-16

Regresi Polinomial

#Penentuan derajat polinomial terbaik dibatasi hingga derajat ke-17
df.list.pol <- data.frame("df" = c(rep(NA, 10)), "AIC1" = c(rep(NA, 10)), "MSE1" = c(rep(NA, 10)), "AIC2" = c(rep(NA, 10)), "MSE2" = c(rep(NA, 10)))
for(j in 2:3){
  for(i in 2:17){
    pol.mod <- lm(autodt[,j] ~ poly(mpg,i,raw = T), data=autodt)
    AIC.pol <- AIC(pol.mod)
    MSE.pol <- mean((predict(pol.mod)-autodt[,j])^2)
    if(j == 3){
      df.list.pol[i-1,1:3] <- c(i, AIC.pol, MSE.pol)}else{
        df.list.pol[i-1, 4:5] <- c(AIC.pol, MSE.pol)
      }
  }
}
knitr::kable(df.list.pol)
df AIC1 MSE1 AIC2 MSE2
2 797.6568 0.4389163 3485.217 416.7871
3 796.2795 0.4351511 3454.949 383.8523
4 797.3754 0.4341486 3456.813 383.7195
5 799.2506 0.4340104 3455.310 380.3060
6 800.0427 0.4326751 3455.135 378.2017
7 801.2607 0.4318129 3455.139 376.2801
8 802.3611 0.4308230 3454.849 374.0890
9 804.3474 0.4308080 3455.295 372.6086
10 803.5020 0.4276922 3456.768 372.1082
11 805.3653 0.4275430 3455.520 369.0375
12 805.3653 0.4275430 3455.520 369.0375
13 800.9718 0.4206264 3455.558 367.1956
14 800.9718 0.4206264 3455.558 367.1956
15 798.6931 0.4160602 3457.551 367.1889
16 798.6931 0.4160602 3457.551 367.1889
17 799.4754 0.4147698 3458.900 366.5791

Berdasarkan hasil looping yang tersedia pada tabel di atas, diketahui bahwa derajat bebas polinomial yang terpilih untuk korelasi model origin ~ mpg adalah tiga dan untuk korelasi model horsepower ~ mpg adalah depalan berdasarkan nilai AIC terkecil.

Scatter Plot Model origin ~ mpg

ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~poly(x,3, raw=T), col="purple", se=F)

Scatter Plot Model horsepower ~ mpg

ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~poly(x,8, raw=T), col="purple", se=F)

Regresi Fungsi Tangga

#Penentuan derajat fungsi tangga terbaik dibatasi hingga derajat ke-17
df.list.step <- data.frame("df" = c(rep(NA, 10)), "AIC1" = c(rep(NA, 10)), "MSE1" = c(rep(NA, 10)), "AIC2" = c(rep(NA, 10)), "MSE2" = c(rep(NA, 10)))
for(j in 2:3){
  for(i in 3:17){
    step.mod <- lm(autodt[,j] ~ cut(mpg,i), data=autodt)
    AIC.step <- AIC(step.mod)
    MSE.step <- mean((predict(step.mod)-autodt[,j])^2)
    if(j == 3){
      df.list.step[i-2,1:3] <- c(i, AIC.step, MSE.step)}else{
        df.list.step[i-2, 4:5] <- c(AIC.step, MSE.step)
      }
  }
}
knitr::kable(df.list.step)
df AIC1 MSE1 AIC2 MSE2
3 825.4751 0.4711959 3722.514 763.5092
4 816.3085 0.4579626 3605.549 563.6549
5 812.0036 0.4506557 3526.845 458.7770
6 812.9059 0.4493955 3554.364 489.6366
7 800.0531 0.4326866 3521.117 447.5318
8 809.4147 0.4408891 3551.701 481.3840
9 807.4994 0.4365074 3494.485 413.8924
10 805.2416 0.4317918 3478.747 395.5813
11 802.1242 0.4261916 3499.380 414.8383
12 792.2372 0.4134617 3496.674 409.8881
13 802.4971 0.4222663 3468.818 379.8288
14 800.3072 0.4177769 3463.291 372.6051
15 796.4998 0.4116333 3468.751 375.9084
16 803.3598 0.4167684 3490.100 394.9285
17 796.7535 0.4077180 3470.907 374.1436

Berdasarkan hasil looping yang tersedia pada tabel di atas, diketahui bahwa derajat bebas (breaks) fungsi tangga yang terpilih untuk korelasi model origin ~ mpg adalah dua belas dan untuk korelasi model horsepower ~ mpg adalah sepuluh berdasarkan nilai AIC terkecil.

Scatter Plot Model origin ~ mpg

ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~cut(x,12), col="purple", se=F)

Scatter Plot Model horsepower ~ mpg

ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~cut(x,10), col="purple", se=F)

Perbandingan Akurasi Model V1

pol.mod.1 <- lm(origin ~ poly(mpg,3,raw = T), data=autodt)
pol.mod.2 <- lm(horsepower ~ poly(mpg,8,raw = T), data=autodt)
step.mod.1 <- lm(origin ~ cut(mpg,12), data=autodt)
step.mod.2 <- lm(horsepower ~ cut(mpg,10), data=autodt)
model.auto1 <- data.frame("Model" = c("Linear (mpg~origin)", "Linear (mpg~horsepower)", "Polinomial(3) (mpg~origin)", "Polinomial(8) (mpg~horsepower)", "Fungsi Tangga(12) (mpg~origin)", "Fungsi Tangga(10) (mpg~horsepower)"), "AIC" = c(AIC(lin.mod.1), AIC(lin.mod.2), AIC(pol.mod.1), AIC(pol.mod.2), AIC(step.mod.1), AIC(step.mod.2)), "MSE" = c(MSE(predict(lin.mod.1),autodt$origin), MSE(predict(lin.mod.2),autodt$horsepower), MSE(predict(pol.mod.1),autodt$origin), MSE(predict(pol.mod.2),autodt$horsepower), MSE(predict(step.mod.1),autodt$origin), MSE(predict(step.mod.2),autodt$horsepower)))
kableExtra::kable(head(model.auto1) ,caption = 'Perbandingan Akurasi Model V1')
Perbandingan Akurasi Model V1
Model AIC MSE
Linear (mpg~origin) 797.0222 0.4404478
Linear (mpg~horsepower) 3614.3235 582.3256764
Polinomial(3) (mpg~origin) 796.2795 0.4351511
Polinomial(8) (mpg~horsepower) 3454.8494 374.0890447
Fungsi Tangga(12) (mpg~origin) 792.2372 0.4134617
Fungsi Tangga(10) (mpg~horsepower) 3478.7475 395.5813018

Berdasarkah perbandingan akurasi model, diketahui bahwa untuk korelasi model origin ~ mpg lebih baik menggunakan model regresi fungsi tangga dengan breaks 12. Sementara itu, untuk korelasi model horsepower ~ mpg lebih baik menggunakan model polinomial dengan derajat bebas 3. Kedua model terbaik ini masih bersifat sementara karena masih dapat dianalisis dengan metode pemulusan.

Regresi Spline

Penentuan banyaknya knots dengan menggunakan bantuan komputer.

knots.val.auto <- attr(bs(autodt$mpg, df=6), "knots")
knots.val.auto
##   25%   50%   75% 
## 17.00 22.75 29.00

Regresi B-Spline

Regresi B-Spline origin ~ mpg

spline.mod.auto1 <- lm(origin ~ bs(mpg, knots=knots.val.auto), data=autodt)
summary(spline.mod.auto1)
## 
## Call:
## lm(formula = origin ~ bs(mpg, knots = knots.val.auto), data = autodt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.39803 -0.38041 -0.02476  0.33868  1.81900 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                       1.04633    0.44800   2.336   0.0200 *
## bs(mpg, knots = knots.val.auto)1 -0.09890    0.64164  -0.154   0.8776  
## bs(mpg, knots = knots.val.auto)2 -0.07956    0.42992  -0.185   0.8533  
## bs(mpg, knots = knots.val.auto)3  0.57546    0.49774   1.156   0.2483  
## bs(mpg, knots = knots.val.auto)4  1.14229    0.49623   2.302   0.0219 *
## bs(mpg, knots = knots.val.auto)5  1.53577    0.64019   2.399   0.0169 *
## bs(mpg, knots = knots.val.auto)6  1.30545    0.61946   2.107   0.0357 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6649 on 385 degrees of freedom
## Multiple R-squared:  0.3292, Adjusted R-squared:  0.3187 
## F-statistic: 31.49 on 6 and 385 DF,  p-value: < 2.2e-16

scatter Plot B-Spline origin ~ mpg

ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="brown") + stat_smooth(method="lm", formula=y~bs(x, knots=knots.val.auto), lty=1, se=F)

Regresi B-Spline horsepower ~ mpg

spline.mod.auto2 <- lm(horsepower ~ bs(mpg, knots=knots.val.auto), data=autodt)
summary(spline.mod.auto2)
## 
## Call:
## lm(formula = horsepower ~ bs(mpg, knots = knots.val.auto), data = autodt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -74.043 -10.103  -0.818   8.075  95.778 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       195.012     13.181  14.795  < 2e-16 ***
## bs(mpg, knots = knots.val.auto)1    2.792     18.879   0.148    0.882    
## bs(mpg, knots = knots.val.auto)2  -80.488     12.649  -6.363 5.62e-10 ***
## bs(mpg, knots = knots.val.auto)3 -103.774     14.645  -7.086 6.62e-12 ***
## bs(mpg, knots = knots.val.auto)4 -126.115     14.600  -8.638  < 2e-16 ***
## bs(mpg, knots = knots.val.auto)5 -127.143     18.836  -6.750 5.45e-11 ***
## bs(mpg, knots = knots.val.auto)6 -141.413     18.226  -7.759 7.77e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.56 on 385 degrees of freedom
## Multiple R-squared:  0.7457, Adjusted R-squared:  0.7417 
## F-statistic: 188.1 on 6 and 385 DF,  p-value: < 2.2e-16

Scatter Plot B-Spline horsepower ~ mpg

ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="brown") + stat_smooth(method="lm", formula=y~bs(x, knots=knots.val.auto), lty=1, se=F)

Natural Spline origin ~ mpg

ns.spline.auto1 <- lm(origin ~ ns(mpg, knots=knots.val.auto), data=autodt)
summary(ns.spline.auto1)
## 
## Call:
## lm(formula = origin ~ ns(mpg, knots = knots.val.auto), data = autodt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.36121 -0.36897 -0.03639  0.34308  1.82256 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        0.9776     0.2295   4.261 2.56e-05 ***
## ns(mpg, knots = knots.val.auto)1   0.6192     0.2188   2.830  0.00489 ** 
## ns(mpg, knots = knots.val.auto)2   1.2993     0.2091   6.215 1.33e-09 ***
## ns(mpg, knots = knots.val.auto)3   1.4244     0.5348   2.663  0.00806 ** 
## ns(mpg, knots = knots.val.auto)4   1.5339     0.2913   5.265 2.33e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6633 on 387 degrees of freedom
## Multiple R-squared:  0.3288, Adjusted R-squared:  0.3218 
## F-statistic: 47.39 on 4 and 387 DF,  p-value: < 2.2e-16

Scatter Plot Natural Spline origin ~ mpg

ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="brown") + stat_smooth(method="lm", formula=y~ns(x, knots=knots.val.auto), lty=1,se=F)

Natural Spline horsepower ~ mpg

ns.spline.auto2 <- lm(horsepower ~ ns(mpg, knots=knots.val.auto), data=autodt)
summary(ns.spline.auto2)
## 
## Call:
## lm(formula = horsepower ~ ns(mpg, knots = knots.val.auto), data = autodt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -72.461 -10.731  -1.433   8.759  95.650 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       218.000      6.786   32.13   <2e-16 ***
## ns(mpg, knots = knots.val.auto)1 -130.511      6.470  -20.17   <2e-16 ***
## ns(mpg, knots = knots.val.auto)2 -109.368      6.183  -17.69   <2e-16 ***
## ns(mpg, knots = knots.val.auto)3 -237.134     15.815  -14.99   <2e-16 ***
## ns(mpg, knots = knots.val.auto)4 -115.116      8.615  -13.36   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.62 on 387 degrees of freedom
## Multiple R-squared:  0.7429, Adjusted R-squared:  0.7403 
## F-statistic: 279.6 on 4 and 387 DF,  p-value: < 2.2e-16

Scatter PLot Natural Spline origin ~ mpg

ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="brown") + stat_smooth(method="lm", formula=y~ns(x, knots=knots.val.auto), lty=1,se=F)

Smoothing Spline

Smoothing Spline origin ~ mpg

ss.auto1 <- with(data=autodt, smooth.spline(mpg, origin))
ss.auto1
## Call:
## smooth.spline(x = mpg, y = origin)
## 
## Smoothing Parameter  spar= 0.6891065  lambda= 0.0002244654 (11 iterations)
## Equivalent Degrees of Freedom (Df): 12.93154
## Penalized Criterion (RSS): 61.61513
## GCV: 0.4385944

Scatter Plot Smoothing Spline origin ~ mpg

pred.ss1 <- broom::augment(ss.auto1)
ggplot(pred.ss1, aes(x=x, y=y)) + geom_point(color="brown") + geom_line(aes(y=.fitted), col="orange", lty=1) + xlab("mpg") + ylab("origin")

Smoothing Spline horsepower ~ mpg

ss.auto2 <- with(data=autodt, smooth.spline(mpg, horsepower))
ss.auto2
## Call:
## smooth.spline(x = mpg, y = horsepower)
## 
## Smoothing Parameter  spar= 0.8528728  lambda= 0.003422279 (12 iterations)
## Equivalent Degrees of Freedom (Df): 6.989153
## Penalized Criterion (RSS): 35962.53
## GCV: 386.3516

Scatter Plot Smoothing Spline horsepower ~ mpg

pred.ss2 <- broom::augment(ss.auto2)
ggplot(pred.ss2, aes(x=x, y=y)) + geom_point(color="brown") + geom_line(aes(y=.fitted), col="orange", lty=1) + xlab("mpg") + ylab("horsepower")

LOESS Functin

LOESS Functin origin ~ mpg

loess.auto1 <- loess(origin~mpg, data=autodt)
summary(loess.auto1)
## Call:
## loess(formula = origin ~ mpg, data = autodt)
## 
## Number of Observations: 392 
## Equivalent Number of Parameters: 4.8 
## Residual Standard Error: 0.6627 
## Trace of smoother matrix: 5.24  (exact)
## 
## Control settings:
##   span     :  0.75 
##   degree   :  2 
##   family   :  gaussian
##   surface  :  interpolate      cell = 0.2
##   normalize:  TRUE
##  parametric:  FALSE
## drop.square:  FALSE

Scatter Plot with Standard Erorr LOESS Functin origin ~ mpg

ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="red") + stat_smooth(method="loess", formula=y~x, col="purple", show.legend=T)

LOESS Functin horsepower ~ mpg

loess.auto2 <- loess(horsepower~mpg, data=autodt)
summary(loess.auto2)
## Call:
## loess(formula = horsepower ~ mpg, data = autodt)
## 
## Number of Observations: 392 
## Equivalent Number of Parameters: 4.8 
## Residual Standard Error: 19.58 
## Trace of smoother matrix: 5.24  (exact)
## 
## Control settings:
##   span     :  0.75 
##   degree   :  2 
##   family   :  gaussian
##   surface  :  interpolate      cell = 0.2
##   normalize:  TRUE
##  parametric:  FALSE
## drop.square:  FALSE

Scatter Plot with Standard Erorr LOESS Functin horsepower ~ mpg

ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="red") + stat_smooth(method="loess", formula=y~x, col="purple", show.legend=T)

Eksploratif: Perbandingan Model V2 origin ~ mpg

ggplot(autodt, aes(x=mpg, y=origin)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~poly(x,3,raw=T), col="blue", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~cut(x,12), col="purple", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~bs(x, knots=knots.val.auto), col="green", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~ns(x, knots=knots.val.auto), col="brown", lwd=1.2, se=F) + stat_smooth(method="loess", formula=y~x, col="orange", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~x, col="red", lwd=1.2, se=F)

Legend plot:
—–: Model regresi linear
—–: Model regresi polinomial
—–: Model regresi fungsi tangga
—–: Model regresi spline
—–: Model regresi natural spline
—–: Model regresi fungsi LOESS

Perbandingan Akurasi Model V2 origin ~ mpg

model.auto2a <- data.frame(Model = c("Linear", "Polinomial(3)", "Fungsi Tangga(12)", "Regresi Spline", "Natural Spline", "Fungsi LOESS"), MSE = c(MSE(predict(lin.mod.1), autodt$origin), MSE(predict(pol.mod.1), autodt$origin), MSE(predict(step.mod.1), autodt$origin), MSE(predict(spline.mod.auto1), autodt$origin), MSE(predict(ns.spline.auto1), autodt$origin), MSE(predict(loess.auto1), autodt$origin)))
kableExtra::kable(head(model.auto2a) ,caption = 'Perbandingan Akurasi Model V2 (origin ~ mpg)')
Perbandingan Akurasi Model V2 (origin ~ mpg)
Model MSE
Linear 0.4404478
Polinomial(3) 0.4351511
Fungsi Tangga(12) 0.4134617
Regresi Spline 0.4341514
Natural Spline 0.4344177
Fungsi LOESS 0.4328007

Berdasarkah perbandingan akurasi model dengan nilai MSE terkecil, diketahui bahwa model regresi fungsi tangga dengan breaks sebesar 12 merupakan model terbaik untuk korelasi model origin ~ mpg.

Eksploratif: Perbandingan Model V2 horsepower ~ mpg

ggplot(autodt, aes(x=mpg, y=horsepower)) + geom_point(color="black") +  stat_smooth(method="lm", formula=y~bs(x, knots=knots.val.auto), col="green", lwd=4, se=F) + stat_smooth(method="loess", formula=y~x, col="orange", lwd=2.5, se=F) + stat_smooth(method="lm", formula=y~cut(x,10), col="purple", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~poly(x,8,raw=T), col="blue", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~ns(x, knots=knots.val.auto), col="brown", lwd=1.2, se=F) + stat_smooth(method="lm", formula=y~x, col="red", lwd=1.2, se=F)

Legend plot:
—–: Model regresi linear
—–: Model regresi polinomial
—–: Model regresi fungsi tangga
—–: Model regresi spline
—–: Model regresi natural spline
—–: Model regresi fungsi LOESS

Perbandingan Akurasi Model V2 horsepower ~ mpg

model.auto2b <- data.frame(Model = c("Linear", "Polinomial(8)", "Fungsi Tangga(10)", "Regresi Spline", "Regresi Natural Spline", "LOESS Function"), MSE = c(MSE(predict(lin.mod.2), autodt$horsepower), MSE(predict(pol.mod.2), autodt$horsepower), MSE(predict(step.mod.2), autodt$horsepower), MSE(predict(spline.mod.auto2), autodt$horsepower), MSE(predict(ns.spline.auto2), autodt$horsepower), MSE(predict(loess.auto2), autodt$horsepower)))
kableExtra::kable(head(model.auto2a) ,caption = 'Perbandingan Akurasi Model V2 (horsepower ~ mpg)')
Perbandingan Akurasi Model V2 (horsepower ~ mpg)
Model MSE
Linear 0.4404478
Polinomial(3) 0.4351511
Fungsi Tangga(12) 0.4134617
Regresi Spline 0.4341514
Natural Spline 0.4344177
Fungsi LOESS 0.4328007

Berdasarkan nilai MSE terkecil pada perbandingan akurasi model di atas, model terbaik yang diperoleh adalah model regresi fungsi tangga dengan breaks 12.

Model with Cross-Validation Evaluation

Pra-proccessing

set.seed(174)
cross.val1 <- vfold_cv(autodt, v=10, strata="origin")
cross.val2 <- vfold_cv(autodt, v=10, strata="horsepower")

Model origin~mpg

Regresi Linear

lin.met1 <- map_dfr(cross.val1$splits, function(x){
    mod <- lm(origin ~ mpg,data=autodt[x$in_id,])
    pred <- predict(mod,newdata=autodt[-x$in_id,])
    truth <- autodt[-x$in_id,]$origin
    rmse <- mlr3measures::rmse(truth = truth,response = pred)
    mae <- mlr3measures::mae(truth = truth,response = pred)
    metric <- c(rmse,mae)
    names(metric) <- c("rmse","mae")
    return(metric)
  }
)
m_lin.met1 <- colMeans(lin.met1)
m_lin.met1
##      rmse       mae 
## 0.6631052 0.5147590

Regresi Polinomial Ordo 3

pol.met1 <- map_dfr(cross.val1$splits,
function(x){
  mod <- lm(origin ~ poly(mpg,3,raw = T),data=autodt[x$in_id,])
  pred <- predict(mod,newdata=autodt[-x$in_id,])
  truth <- autodt[-x$in_id,]$origin
  rmse <- mlr3measures::rmse(truth = truth,response = pred)
  mae <- mlr3measures::mae(truth = truth,response = pred)
  metric <- c(rmse,mae)
  names(metric) <- c("rmse","mae")
  return(metric)
  }
)
m_pol.met1 <- colMeans(pol.met1)
m_pol.met1
##      rmse       mae 
## 0.6623899 0.5033983

Regresi Fungsi Tangga

breaks <- 3:17
best.step1 <- map_dfr(breaks, function(i){
    step.met1 <- map_dfr(cross.val1$splits,
    function(x){
        training <-autodt[x$in_id,]
        training$mpg <- cut(training$mpg,i)
        mod <- lm(origin ~ mpg, data=training)
        labs_x <- levels(mod$model[,2])
        labs_x_breaks <- cbind(lower = as.numeric(sub("\\((.+),.*", "\\1", labs_x)), upper = as.numeric(sub("[^,]*,([^]]*)\\]", "\\1", labs_x)))
        testing <- autodt[-x$in_id,]
        newdt <- cut(testing$mpg, c(labs_x_breaks[1,1], labs_x_breaks[,2]))
        pred <- predict(mod, newdata=list(mpg=newdt))
        truth <- testing$origin
        data_eval <- na.omit(data.frame(truth,pred))
        rmse <- mlr3measures::rmse(truth = data_eval$truth, response = data_eval$pred)
        mae <- mlr3measures::mae(truth = data_eval$truth, response = data_eval$pred)
        metric <- c(rmse,mae)
        names(metric) <- c("rmse","mae")
        return(metric)
      }
    )
  step.met1
  #Menghitung rata-rata untuk 10 folds
  m_step.met1 <- colMeans(step.met1)
  m_step.met1
  }
)

best.step1 <- cbind(breaks=breaks, best.step1)

best.step1 %>% slice_min(rmse, n=5)
##   breaks      rmse       mae
## 1     12 0.6588186 0.4904860
## 2     17 0.6613277 0.4861881
## 3     14 0.6622931 0.4954355
## 4      7 0.6638867 0.5061256
## 5     15 0.6657763 0.4867893
best.step1 %>% slice_min(mae, n=1)
##   breaks      rmse       mae
## 1     17 0.6613277 0.4861881

Best step yang digunakan, yaitu breaks = 17 berdasarkan nilai mae terkecil.

Regresi Spline

spline.met1 <- map_dfr(cross.val1$splits,
function(x){
  mod <- lm(origin ~ bs(mpg,knots=knots.val.auto),data=autodt[x$in_id,])
  pred <- predict(mod,newdata=autodt[-x$in_id,])
  truth <- autodt[-x$in_id,]$origin
  rmse <- mlr3measures::rmse(truth = truth,response = pred)
  mae <- mlr3measures::mae(truth = truth,response = pred)
  metric <- c(rmse,mae)
  names(metric) <- c("rmse","mae")
  return(metric)
  }
)
## Warning in bs(mpg, degree = 3L, knots = c(`25%` = 17, `50%` = 22.75, `75%` = 29:
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(mpg, degree = 3L, knots = c(`25%` = 17, `50%` = 22.75, `75%` = 29:
## some 'x' values beyond boundary knots may cause ill-conditioned bases
m_spline.met1 <- colMeans(spline.met1)
m_spline.met1
##      rmse       mae 
## 0.6687020 0.4995128

Regresi Natural Spline

nspl.met1 <- map_dfr(cross.val1$splits,
function(x){
  mod <- lm(origin ~ ns(mpg,knots=knots.val.auto),data=autodt[x$in_id,])
  pred <- predict(mod,newdata=autodt[-x$in_id,])
  truth <- autodt[-x$in_id,]$origin
  rmse <- mlr3measures::rmse(truth = truth,response = pred)
  mae <- mlr3measures::mae(truth = truth,response = pred)
  metric <- c(rmse,mae)
  names(metric) <- c("rmse","mae")
  return(metric)
  }
)
m_nspl.met1 <- colMeans(nspl.met1)
m_nspl.met1
##      rmse       mae 
## 0.6641734 0.4962609

Regresi: LOESS

span <- seq(0.1, 1, length.out=91)
best.loess1 <- map_dfr(span, function(i){
  loess.met1 <- map_dfr(cross.val1$splits, function(x){
    mod <- loess(origin ~ mpg, span=i, data=autodt[x$in_id,])
    pred <- predict(mod, newdata=autodt[-x$in_id,])
    truth <- autodt[-x$in_id,]$origin
    data_eval <- na.omit(data.frame(pred=pred, truth=truth))
    rmse <- mlr3measures::rmse(truth=data_eval$truth, response=data_eval$pred)
    mae <- mlr3measures::mae(truth=data_eval$truth, response=data_eval$pred)
    metric <- c(rmse,mae)
    names(metric) <- c("rmse","mae")
    return(metric)
    }
)

  head(loess.met1,20)

  #Menghitung rata-rata untuk 10 folds
  m_loess.met1 <- colMeans(loess.met1)
  m_loess.met1
  }
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6453e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.7908e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.0291e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.6179e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.2038e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1236e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.4554e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.6336e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1277e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.5701e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.2038e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6453e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.8633e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.5701e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.8633e-016
best.loess1 <- cbind(span=span,best.loess1)
#Menampilkan hasil all breaks
best.loess1 %>% slice_min(rmse, n=1)
##   span      rmse       mae
## 1 0.28 0.6609029 0.4945192
best.loess1 %>% slice_min(mae, n=1)
##   span      rmse       mae
## 1 0.11 0.6677179 0.4876461

Berdasarkan nilai rmse terkecil, best LOESS pada span 0.28, sedangkan best LOESS pada span 0.11 berdasarkan nilai mae terkecil.

Perbandingan Akurasi Model

df.cv1 <- rbind(m_lin.met1, m_pol.met1, best.step1[9,2:3], m_spline.met1, m_nspl.met1, best.loess1[35,-1])
rownames(df.cv1) <- c("Linear", "Polinomial(3)", "Fungsi Tangga(17)", "Spline", "Natural Spline", "Fungsi LOESS(0.11)")
kableExtra::kable(head(df.cv1) ,caption = 'Perbandingan Akurasi Model dengan *Cross Validation Evaluation* (origin ~ mpg)')
Perbandingan Akurasi Model dengan Cross Validation Evaluation (origin ~ mpg)
rmse mae
Linear 0.6631052 0.5147590
Polinomial(3) 0.6623899 0.5033983
Fungsi Tangga(17) 0.6725749 0.5016854
Spline 0.6687020 0.4995128
Natural Spline 0.6641734 0.4962609
Fungsi LOESS(0.11) 0.6622263 0.4951171

Berdasarkan nilai rmse dan mae terkecil, dengan menggunakan cross-validation evaluation 10 folds, model regresi fungsi LOESS dengan span sebesar 0.11 merupakan model terbaik untuk korelasi model origin ~ mpg dibandingkan model lainnya.

Model horsepower~mpg

Regresi Linier

lin.met2 <- map_dfr(cross.val2$splits, function(x){
    mod <- lm(horsepower ~ mpg,data=autodt[x$in_id,])
    pred <- predict(mod,newdata=autodt[-x$in_id,])
    truth <- autodt[-x$in_id,]$horsepower
    rmse <- mlr3measures::rmse(truth = truth,response = pred)
    mae <- mlr3measures::mae(truth = truth,response = pred)
    metric <- c(rmse,mae)
    names(metric) <- c("rmse","mae")
    return(metric)
  }
)
m_lin.met2 <- colMeans(lin.met2)
m_lin.met2
##     rmse      mae 
## 24.07495 18.46422

Regresi Polinomial Ordo 8

pol.met2 <- map_dfr(cross.val2$splits,
function(x){
  mod <- lm(horsepower ~ poly(mpg,8,raw = T),data=autodt[x$in_id,])
  pred <- predict(mod,newdata=autodt[-x$in_id,])
  truth <- autodt[-x$in_id,]$horsepower
  rmse <- mlr3measures::rmse(truth = truth,response = pred)
  mae <- mlr3measures::mae(truth = truth,response = pred)
  metric <- c(rmse,mae)
  names(metric) <- c("rmse","mae")
  return(metric)
  }
)
m_pol.met2 <- colMeans(pol.met2)
m_pol.met2
##     rmse      mae 
## 20.50232 14.37619

Regresi Fungsi Tangga

breaks <- 3:12
best.step2 <- map_dfr(breaks, function(i){
    step.met2 <- map_dfr(cross.val2$splits,
    function(x){
        training <- autodt[x$in_id,]
        training$mpg <- cut(training$mpg,i)
        mod <- lm(horsepower ~ mpg, data=training)
        labs_x <- levels(mod$model[,2])
        labs_x_breaks <- cbind(lower = as.numeric(sub("\\((.+),.*", "\\1", labs_x)), upper = as.numeric(sub("[^,]*,([^]]*)\\]", "\\1", labs_x)))
        testing <- autodt[-x$in_id,]
        newdt <- cut(testing$mpg, c(labs_x_breaks[1,1], labs_x_breaks[,2]))
        pred <- predict(mod, newdata=list(mpg=newdt))
        truth <- testing$horsepower
        data_eval <- na.omit(data.frame(truth,pred))
        rmse <- mlr3measures::rmse(truth = data_eval$truth, response = data_eval$pred)
        mae <- mlr3measures::mae(truth = data_eval$truth, response = data_eval$pred)
        metric <- c(rmse,mae)
        names(metric) <- c("rmse","mae")
        return(metric)
      }
    )
  step.met2
  # menghitung rata-rata untuk 10 folds
  m_step.met2 <- colMeans(step.met2)
  m_step.met2
  }
)

best.step2 <- cbind(breaks=breaks, best.step2)
# menampilkan hasil all breaks
best.step2 %>% slice_min(rmse, n=5)
##   breaks     rmse      mae
## 1     10 19.94188 14.00930
## 2     12 20.20257 14.48694
## 3     11 20.23586 14.67526
## 4      9 20.78474 14.44107
## 5      5 21.23067 15.23984
best.step2 %>% slice_min(mae, n=5)
##   breaks     rmse      mae
## 1     10 19.94188 14.00930
## 2      9 20.78474 14.44107
## 3     12 20.20257 14.48694
## 4     11 20.23586 14.67526
## 5      5 21.23067 15.23984

Regresi Spline

spline.met2 <- map_dfr(cross.val2$splits,
function(x){
  mod <- lm(horsepower ~ bs(mpg, knots=knots.val.auto),data=autodt[x$in_id,])
  pred <- predict(mod, newdata=autodt[-x$in_id,])
  truth <- autodt[-x$in_id,]$horsepower
  rmse <- mlr3measures::rmse(truth = truth,response=pred)
  mae <- mlr3measures::mae(truth = truth,response = pred)
  metric <- c(rmse,mae)
  names(metric) <- c("rmse","mae")
  return(metric)
  }
)
## Warning in bs(mpg, degree = 3L, knots = c(`25%` = 17, `50%` = 22.75, `75%` = 29:
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(mpg, degree = 3L, knots = c(`25%` = 17, `50%` = 22.75, `75%` = 29:
## some 'x' values beyond boundary knots may cause ill-conditioned bases
m_spline.met2 <- colMeans(spline.met2)
m_spline.met2
##     rmse      mae 
## 19.53639 14.09720

Regresi Natural Spline

nspl.met2 <- map_dfr(cross.val2$splits,
function(x){
  mod <- lm(horsepower ~ ns(mpg,knots=knots.val.auto),data=autodt[x$in_id,])
  pred <- predict(mod,newdata=autodt[-x$in_id,])
  truth <- autodt[-x$in_id,]$horsepower
  rmse <- mlr3measures::rmse(truth = truth,response = pred)
  mae <- mlr3measures::mae(truth = truth,response = pred)
  metric <- c(rmse,mae)
  names(metric) <- c("rmse","mae")
  return(metric)
  }
)
m_nspl.met2 <- colMeans(nspl.met2)
m_nspl.met2
##     rmse      mae 
## 19.53694 14.16507

Regresi: LOESS

span <- seq(0.1, 1, length.out=91)
best.loess2 <- map_dfr(span, function(i){
  loess.met2 <- map_dfr(cross.val1$splits, function(x){
    mod <- loess(horsepower ~ mpg, span=i, data=autodt[x$in_id,])
    pred <- predict(mod, newdata=autodt[-x$in_id,])
    truth <- autodt[-x$in_id,]$horsepower
    data_eval <- na.omit(data.frame(pred=pred, truth=truth))
    rmse <- mlr3measures::rmse(truth=data_eval$truth, response=data_eval$pred)
    mae <- mlr3measures::mae(truth=data_eval$truth, response=data_eval$pred)
    metric <- c(rmse,mae)
    names(metric) <- c("rmse","mae")
    return(metric)
    }
  )
  head(loess.met2,20)
  #Menghitung rata-rata untuk 10 folds
  m_loess.met2 <- colMeans(loess.met2)
  m_loess.met2
  }
)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6453e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.7908e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.0291e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.6179e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.2038e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6592e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1236e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.4554e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.6336e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1277e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.5701e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.2038e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0975e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6453e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.8633e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.0996e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.5701e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.1005e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 14
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 3.8633e-016
best.loess2 <- cbind(span=span,best.loess2)

best.loess2 %>% slice_min(rmse, n=1)
##   span     rmse      mae
## 1 0.33 19.26358 13.82094
best.loess2 %>% slice_min(mae, n=1)
##   span     rmse    mae
## 1 0.32 19.27623 13.813

Berdasarkan nilai rmse terkecil, best LOESS pada span 0.33, sedangkan best LOESS pada span 0.32 berdasarkan nilai mae terkecil.

Perbandingan Akurasi Model

df.cv2 <- rbind(m_lin.met2, m_pol.met2, best.step2[8,2:3], m_spline.met2, m_nspl.met2, best.loess2[47,-1])
rownames(df.cv2) <- c("Linear", "Polinomial(8)", "Fungsi Tangga(10)", "Spline", "Natural Spline", "Fungsi LOESS(0.32)")
kableExtra::kable(head(df.cv2) ,caption = 'Perbandingan Akurasi Model dengan *Cross Validation Evaluation* (horsepower ~ mpg)')
Perbandingan Akurasi Model dengan Cross Validation Evaluation (horsepower ~ mpg)
rmse mae
Linear 24.07495 18.46422
Polinomial(8) 20.50232 14.37619
Fungsi Tangga(10) 19.94188 14.00930
Spline 19.53639 14.09720
Natural Spline 19.53694 14.16507
Fungsi LOESS(0.32) 19.26656 13.86220

Berdasarkan nilai rmse dan mae terkecil, dengan menggunakan cross-validation evaluation 10 folds, model regresi fungsi LOESS dengan span sebesar 0.32 merupakan model terbaik untuk korelasi model horsepower ~ mpg dibandingkan model lainnya.