1. Seluruh Data Mobil

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

library(ISLR)
library(boot)
attach(Auto)
set.seed(1)
delta.1.poly <- rep(NA, 10)
delta.2.poly <- rep(NA, 10)
for (i in 1:10) {
  fit <- glm(mpg ~ poly(horsepower, i), data = Auto)
  delta.1.poly[i] <- cv.glm(Auto, fit, K=5)$delta[1]
  delta.2.poly[i] <- cv.glm(Auto, fit, K=5)$delta[2]
}
plot(1:10, delta.1.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.poly), delta.1.poly[which.min(delta.1.poly)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.poly), delta.2.poly[which.min(delta.2.poly)], col = "red", cex = 2, pch = 20)

A.2 Prediksi

prediksi.poly7 = predict( glm(mpg ~ poly(horsepower,7)) , data.frame(horsepower), interval="predict" )
prediksi.poly8 = predict( glm(mpg ~ poly(horsepower,8)) , data.frame(horsepower), interval="predict" )

A.3 Plot

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Regresi Polinomial Ordo 7 dan 8")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.poly7[ix], 
      main = "Polynomial Regression Ordo 7", col="blue",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.poly8[ix], 
      main = "Polynomial Regression Ordo 8", col="green",
      type="l", lwd=2)

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

library(ISLR)
library(boot)
attach(Auto)

## The following objects are masked from Auto (pos = 3):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year

set.seed(1)
delta.1.cut <- rep(NA, 10)
delta.2.cut <- rep(NA, 10)
for (i in 2:10) {
    Auto$horsepower.cut <- cut(Auto$horsepower, i)
    fit <- glm(mpg ~ horsepower.cut, data = Auto)
    delta.1.cut[i] <- cv.glm(Auto, fit, K = 5)$delta[1]
    delta.2.cut[i] <- cv.glm(Auto, fit, K = 5)$delta[2]
}
plot(1:10, delta.1.cut, xlab = "Cuts", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.cut), delta.1.cut[which.min(delta.1.cut)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.cut, xlab = "Cuts", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.cut), delta.2.cut[which.min(delta.2.cut)], col = "red", cex = 2, pch = 20)

B.2 Prediksi

prediksi.cut8 = predict( glm(mpg ~ cut(horsepower,8)) , data.frame(horsepower), interval="predict" )

B.3 Plot

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Piecewise Constant Regression Cut 8")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.cut8[ix], 
      main = "Piecewise Constant Regression Cut 8", col="red",
      type="l", lwd=2)

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

library(ISLR)
library(boot)
library(splines)
attach(Auto)

## The following objects are masked from Auto (pos = 4):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year

## The following objects are masked from Auto (pos = 5):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year

set.seed(1)
delta.1.ns <- rep(NA, 30)
delta.2.ns <- rep(NA, 30)
for (i in 1:30) {
    #Auto$horsepower.ns <- ns(Auto$horsepower, i)
    #fit <- glm(mpg ~ horsepower.ns, data = Auto)
    fit <- glm(mpg ~ ns(horsepower, i), data = Auto)
    delta.1.ns[i] <- cv.glm(Auto, fit, K = 5)$delta[1]
    delta.2.ns[i] <- cv.glm(Auto, fit, K = 5)$delta[2]
}
plot(1:30, delta.1.ns, xlab = "NS", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.ns), delta.1.ns[which.min(delta.1.ns)], col = "red", cex = 2, pch = 20)

plot(1:30, delta.2.ns, xlab = "NS", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.ns), delta.2.ns[which.min(delta.2.ns)], col = "red", cex = 2, pch = 20)

which.min(delta.1.ns)

## [1] 28

which.min(delta.2.ns)

## [1] 29

C.2 Prediksi

prediksi.ns28 = predict( glm(mpg ~ ns(horsepower,28)) , data.frame(horsepower), interval="predict" )
prediksi.ns29 = predict( glm(mpg ~ ns(horsepower,29)) , data.frame(horsepower), interval="predict" )

C.3 Plot

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Natural Cubic Spline Regression DF=28 dan DF=29")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.ns28[ix], 
      main = "Natural Cubic Spline Regression DF=28", col="black",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.ns29[ix], 
      main = "Natural Cubic Spline Regression DF=29", col="yellow",
      type="l", lwd=2)

D. Gabungan Plot

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Gabungan Plot Yang Terbaik")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.poly7[ix], 
      main = "Polynomial Regression Ordo 7", col="blue",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.poly8[ix], 
      main = "Polynomial Regression Ordo 8", col="green",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.cut8[ix], 
      main = "Piecewise Constant Regression Cut 8", col="red",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.ns28[ix], 
      main = "Natural Cubic Spline Regression DF=28", col="black",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.ns29[ix], 
      main = "Natural Cubic Spline Regression DF=29", col="yellow",
      type="l", lwd=2)

E. Delta terkecil

min(delta.1.poly)

## [1] 18.68262

min(delta.1.cut, na.rm=TRUE)

## [1] 20.05522

min(delta.1.ns)

## [1] 18.25245

Delta terkecil, pada Natural Cubic Spline Regression dengan DF = 28.

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Model Terbaik Natural Cubic Spline Regression DF=28")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.ns28[ix], 
      main = "Natural Cubic Spline Regression DF=28", col="black",
      type="l", lwd=2)

2. Mobil dari Masing-Masing Negara

Membagi menjadi Tiap Negara, info di Keterangan Data Origin Origin of car (1. American, 2. European, 3. Japanese)

library(ISLR)
library(boot)
attach(Auto)

## The following objects are masked from Auto (pos = 3):
## 
##     acceleration, cylinders, displacement, horsepower, horsepower.cut,
##     mpg, name, origin, weight, year

## The following objects are masked from Auto (pos = 5):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year

## The following objects are masked from Auto (pos = 6):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year

id.A<-Auto$origin==1
Amerika <- Auto[id.A,]

id.E<-Auto$origin==2 
Eropa <- Auto[id.E,]

id.J<-Auto$origin==3
Jepang <- Auto[id.J,]

3. Khusus Mobil Amerika

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.poly <- rep(NA, 10)
delta.2.poly <- rep(NA, 10)
for (i in 1:10) {
  fit <- glm(mpg ~ poly(horsepower, i), data = Amerika)
  delta.1.poly[i] <- cv.glm(Amerika, fit, K=5)$delta[1]
  delta.2.poly[i] <- cv.glm(Amerika, fit, K=5)$delta[2]
}
plot(1:10, delta.1.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.poly), delta.1.poly[which.min(delta.1.poly)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.poly), delta.2.poly[which.min(delta.2.poly)], col = "red", cex = 2, pch = 20)

A.2 Prediksi

Amerika$prediksi.poly6 = predict( glm(Amerika$mpg ~ poly(Amerika$horsepower,6)) , data.frame(Amerika$horsepower), interval="predict" )
Amerika$prediksi.poly7 = predict( glm(Amerika$mpg ~ poly(Amerika$horsepower,7)) , data.frame(Amerika$horsepower), interval="predict" )

A.3 Plot

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Regresi Polinomial Ordo 6 dan 7")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.poly6[ix], 
      main = "Polynomial Regression Ordo 6", col="blue",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.poly7[ix], 
      main = "Polynomial Regression Ordo 7", col="green",
      type="l", lwd=2)

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.cut <- rep(NA, 10)
delta.2.cut <- rep(NA, 10)
for (i in 2:10) {
    Amerika$horsepower.cut <- cut(Amerika$horsepower, i)
    fit <- glm(mpg ~ horsepower.cut, data = Amerika)
    delta.1.cut[i] <- cv.glm(Amerika, fit, K = 5)$delta[1]
    delta.2.cut[i] <- cv.glm(Amerika, fit, K = 5)$delta[2]
}
plot(1:10, delta.1.cut, xlab = "Cuts", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.cut), delta.1.cut[which.min(delta.1.cut)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.cut, xlab = "Cuts", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.cut), delta.2.cut[which.min(delta.2.cut)], col = "red", cex = 2, pch = 20)

B.2 Prediksi

Amerika$prediksi.cut9 = predict( glm(Amerika$mpg ~ cut(Amerika$horsepower,9)) , data.frame(Amerika$horsepower), interval="predict" )

B.3 Plot

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Piecewise Constant Regression Cut 9")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.cut9[ix], 
      main = "Piecewise Constant Regression Cut 9", col="red",
      type="l", lwd=2)

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

library(ISLR)
library(boot)
library(splines)
set.seed(1)
delta.1.ns <- rep(NA, 30)
delta.2.ns <- rep(NA, 30)
for (i in 1:30) {
    #Amerika$horsepower.ns <- ns(Amerika$horsepower, i)
    #fit <- glm(mpg ~ horsepower.ns, data = Amerika)
    fit <- glm(mpg ~ ns(horsepower, i), data = Amerika)
    delta.1.ns[i] <- cv.glm(Amerika, fit, K = 5)$delta[1]
    delta.2.ns[i] <- cv.glm(Amerika, fit, K = 5)$delta[2]
}

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

plot(1:30, delta.1.ns, xlab = "NS", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.ns), delta.1.ns[which.min(delta.1.ns)], col = "red", cex = 2, pch = 20)

plot(1:30, delta.2.ns, xlab = "NS", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.ns), delta.2.ns[which.min(delta.2.ns)], col = "red", cex = 2, pch = 20)

which.min(delta.1.ns)

## [1] 26

which.min(delta.2.ns)

## [1] 19

C.2 Prediksi

Amerika$prediksi.ns26 = predict( glm(Amerika$mpg ~ ns(Amerika$horsepower,26)) , data.frame(Amerika$horsepower), interval="predict" )
Amerika$prediksi.ns19 = predict( glm(Amerika$mpg ~ ns(Amerika$horsepower,19)) , data.frame(Amerika$horsepower), interval="predict" )

C.3 Plot

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Natural Cubic Spline Regression DF=26 dan DF=19")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.ns26[ix], 
      main = "Natural Cubic Spline Regression DF=26", col="black",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.ns19[ix], 
      main = "Natural Cubic Spline Regression DF=19", col="yellow",
      type="l", lwd=2)

D. Gabungan Plot

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Gabungan Plot Yang Terbaik")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.poly6[ix], 
      main = "Polynomial Regression Ordo 6", col="blue",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.poly7[ix], 
      main = "Polynomial Regression Ordo 7", col="green",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.cut9[ix], 
      main = "Piecewise Constant Regression Cut 9", col="red",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.ns26[ix], 
      main = "Natural Cubic Spline Regression DF=26", col="black",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.ns19[ix], 
      main = "Natural Cubic Spline Regression DF=19", col="yellow",
      type="l", lwd=2)

E. Delta terkecil

min(delta.1.poly)

## [1] 14.77189

min(delta.1.cut, na.rm=TRUE)

## [1] 14.51988

min(delta.1.ns)

## [1] 12.78074

Delta terkecil, pada Natural Cubic Spline Regression dengan DF = 26.

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Model Terbaik Natural Cubic Spline Regression DF=26")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.ns26[ix], 
      main = "Natural Cubic Spline Regression DF=26", col="black",
      type="l", lwd=2)

4. Khusus Mobil Eropa

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.poly <- rep(NA, 10)
delta.2.poly <- rep(NA, 10)
for (i in 1:10) {
  fit <- glm(mpg ~ poly(horsepower, i), data = Eropa)
  delta.1.poly[i] <- cv.glm(Eropa, fit, K=5)$delta[1]
  delta.2.poly[i] <- cv.glm(Eropa, fit, K=5)$delta[2]
}
plot(1:10, delta.1.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.poly), delta.1.poly[which.min(delta.1.poly)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.poly), delta.2.poly[which.min(delta.2.poly)], col = "red", cex = 2, pch = 20)

A.2 Prediksi

Eropa$prediksi.poly1 = predict( glm(Eropa$mpg ~ poly(Eropa$horsepower,1)) , data.frame(Eropa$horsepower), interval="predict" )
Eropa$prediksi.poly2 = predict( glm(Eropa$mpg ~ poly(Eropa$horsepower,2)) , data.frame(Eropa$horsepower), interval="predict" )

A.3 Plot

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Regresi Polinomial Ordo 1 dan 2")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.poly1[ix], 
      main = "Polynomial Regression Ordo 1", col="blue",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.poly2[ix], 
      main = "Polynomial Regression Ordo 2", col="green",
      type="l", lwd=2)

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.cut <- rep(NA, 5)
delta.2.cut <- rep(NA, 5)
for (i in 2:5) {
    Eropa$horsepower.cut <- cut(Eropa$horsepower, i)
    fit <- glm(mpg ~ horsepower.cut, data = Eropa)
    delta.1.cut[i] <- cv.glm(Eropa, fit, K = 5)$delta[1]
    delta.2.cut[i] <- cv.glm(Eropa, fit, K = 5)$delta[2]
}
plot(1:5, delta.1.cut, xlab = "Cuts", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.cut), delta.1.cut[which.min(delta.1.cut)], col = "red", cex = 2, pch = 20)

plot(1:5, delta.2.cut, xlab = "Cuts", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.cut), delta.2.cut[which.min(delta.2.cut)], col = "red", cex = 2, pch = 20)

B.2 Prediksi

Eropa$prediksi.cut5 = predict( glm(Eropa$mpg ~ cut(Eropa$horsepower,5)) , data.frame(Eropa$horsepower), interval="predict" )

B.3 Plot

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Piecewise Constant Regression Cut 5")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.cut5[ix], 
      main = "Piecewise Constant Regression Cut 5", col="red",
      type="l", lwd=2)

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

library(ISLR)
library(boot)
library(splines)
set.seed(1)
delta.1.ns <- rep(NA, 30)
delta.2.ns <- rep(NA, 30)
for (i in 1:30) {
    #Eropa$horsepower.ns <- ns(Eropa$horsepower, i)
    #fit <- glm(mpg ~ horsepower.ns, data = Eropa)
    fit <- glm(mpg ~ ns(horsepower, i), data = Eropa)
    delta.1.ns[i] <- cv.glm(Eropa, fit, K = 4)$delta[1]
    delta.2.ns[i] <- cv.glm(Eropa, fit, K = 4)$delta[2]
}

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

plot(1:30, delta.1.ns, xlab = "NS", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.ns), delta.1.ns[which.min(delta.1.ns)], col = "red", cex = 2, pch = 20)

plot(1:30, delta.2.ns, xlab = "NS", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.ns), delta.2.ns[which.min(delta.2.ns)], col = "red", cex = 2, pch = 20)

which.min(delta.1.ns)

## [1] 2

which.min(delta.2.ns)

## [1] 1

C.2 Prediksi

Eropa$prediksi.ns2 = predict( glm(Eropa$mpg ~ ns(Eropa$horsepower,2)) , data.frame(Eropa$horsepower), interval="predict" )
Eropa$prediksi.ns1 = predict( glm(Eropa$mpg ~ ns(Eropa$horsepower,1)) , data.frame(Eropa$horsepower), interval="predict" )

C.3 Plot

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Natural Cubic Spline Regression DF=2 dan DF=1")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.ns2[ix], 
      main = "Natural Cubic Spline Regression DF=2", col="black",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.ns1[ix], 
      main = "Natural Cubic Spline Regression DF=1", col="yellow",
      type="l", lwd=2)

D. Gabungan Plot

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Gabungan Plot Yang Terbaik")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.poly1[ix], 
      main = "Polynomial Regression Ordo 1", col="blue",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.poly2[ix], 
      main = "Polynomial Regression Ordo 2", col="green",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.cut5[ix], 
      main = "Piecewise Constant Regression Cut 5", col="red",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.ns2[ix], 
      main = "Natural Cubic Spline Regression DF=2", col="black",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.ns1[ix], 
      main = "Natural Cubic Spline Regression DF=1", col="yellow",
      type="l", lwd=2)

E. Delta terkecil

min(delta.1.poly)

## [1] 23.42186

min(delta.1.cut, na.rm=TRUE)

## [1] 28.55445

min(delta.1.ns)

## [1] 23.62112

Delta terkecil, pada Polynomial Regression dengan Ordo 1.

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Model Terbaik Polynomial Regression Ordo 1")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.poly1[ix], 
      main = "Polynomial Regression Ordo 1", col="black",
      type="l", lwd=2)

5. Khusus Mobil Jepang

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.poly <- rep(NA, 10)
delta.2.poly <- rep(NA, 10)
for (i in 1:10) {
  fit <- glm(mpg ~ poly(horsepower, i), data = Jepang)
  delta.1.poly[i] <- cv.glm(Jepang, fit, K=5)$delta[1]
  delta.2.poly[i] <- cv.glm(Jepang, fit, K=5)$delta[2]
}
plot(1:10, delta.1.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.poly), delta.1.poly[which.min(delta.1.poly)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.poly), delta.2.poly[which.min(delta.2.poly)], col = "red", cex = 2, pch = 20)

A.2 Prediksi

Jepang$prediksi.poly5 = predict( glm(Jepang$mpg ~ poly(Jepang$horsepower,5)) , data.frame(Jepang$horsepower), interval="predict" )
Jepang$prediksi.poly3 = predict( glm(Jepang$mpg ~ poly(Jepang$horsepower,3)) , data.frame(Jepang$horsepower), interval="predict" )

A.3 Plot

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Regresi Polinomial Ordo 5 dan 3")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.poly5[ix], 
      main = "Polynomial Regression Ordo 5", col="blue",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.poly3[ix], 
      main = "Polynomial Regression Ordo 3", col="green",
      type="l", lwd=2)

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.cut <- rep(NA, 5)
delta.2.cut <- rep(NA, 5)
for (i in 2:5) {
    Jepang$horsepower.cut <- cut(Jepang$horsepower, i)
    fit <- glm(mpg ~ horsepower.cut, data = Jepang)
    delta.1.cut[i] <- cv.glm(Jepang, fit, K = 5)$delta[1]
    delta.2.cut[i] <- cv.glm(Jepang, fit, K = 5)$delta[2]
}
plot(1:5, delta.1.cut, xlab = "Cuts", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.cut), delta.1.cut[which.min(delta.1.cut)], col = "red", cex = 2, pch = 20)

plot(1:5, delta.2.cut, xlab = "Cuts", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.cut), delta.2.cut[which.min(delta.2.cut)], col = "red", cex = 2, pch = 20)

B.2 Prediksi

Jepang$prediksi.cut4 = predict( glm(Jepang$mpg ~ cut(Jepang$horsepower,4)) , data.frame(Jepang$horsepower), interval="predict" )

B.3 Plot

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Piecewise Constant Regression Cut 4")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.cut4[ix], 
      main = "Piecewise Constant Regression Cut 4", col="red",
      type="l", lwd=2)

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

library(ISLR)
library(boot)
library(splines)
set.seed(1)
delta.1.ns <- rep(NA, 10)
delta.2.ns <- rep(NA, 10)
for (i in 1:10) {
    #Jepang$horsepower.ns <- ns(Jepang$horsepower, i)
    #fit <- glm(mpg ~ horsepower.ns, data = Jepang)
    fit <- glm(mpg ~ ns(horsepower, i), data = Jepang)
    delta.1.ns[i] <- cv.glm(Jepang, fit, K = 5)$delta[1]
    delta.2.ns[i] <- cv.glm(Jepang, fit, K = 5)$delta[2]
}
plot(1:10, delta.1.ns, xlab = "NS", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.ns), delta.1.ns[which.min(delta.1.ns)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.ns, xlab = "NS", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.ns), delta.2.ns[which.min(delta.2.ns)], col = "red", cex = 2, pch = 20)

which.min(delta.1.ns)

## [1] 8

which.min(delta.2.ns)

## [1] 3

C.2 Prediksi

Jepang$prediksi.ns8 = predict( glm(Jepang$mpg ~ ns(Jepang$horsepower,8)) , data.frame(Jepang$horsepower), interval="predict" )
Jepang$prediksi.ns3 = predict( glm(Jepang$mpg ~ ns(Jepang$horsepower,3)) , data.frame(Jepang$horsepower), interval="predict" )

C.3 Plot

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Natural Cubic Spline Regression DF=8 dan DF=3")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.ns8[ix], 
      main = "Natural Cubic Spline Regression DF=8", col="black",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.ns3[ix], 
      main = "Natural Cubic Spline Regression DF=3", col="yellow",
      type="l", lwd=2)

D. Gabungan Plot

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Gabungan Plot Yang Terbaik")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.poly5[ix], 
      main = "Polynomial Regression Ordo 5", col="blue",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.poly3[ix], 
      main = "Polynomial Regression Ordo 3", col="green",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.cut4[ix], 
      main = "Piecewise Constant Regression Cut 4", col="red",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.ns8[ix], 
      main = "Natural Cubic Spline Regression DF=8", col="black",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.ns3[ix], 
      main = "Natural Cubic Spline Regression DF=3", col="yellow",
      type="l", lwd=2)

E. Delta terkecil

min(delta.1.poly)

## [1] 17.29743

min(delta.1.cut, na.rm=TRUE)

## [1] 19.75088

min(delta.1.ns)

## [1] 18.8119

Delta terkecil, pada Polynomiale Regression dengan Ordo 5.

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Model Terbaik Polynomial Regression Ordo 5")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.poly5[ix], 
      main = " Polynomial Regression Ordo 56", col="black",
      type="l", lwd=2)

6. Plot 3 Negara Menggunakan ggplot2

A. Model Terbaik Amerika

Amerikana<-data.frame(Amerika$prediksi.ns26,Amerika$horsepower)

B. Model Terbaik Eropa

Eropana<-data.frame(Eropa$prediksi.poly1,Eropa$horsepower)

C. Model Terbaik Jepang

Jepangana <- data.frame(Jepang$prediksi.poly5,Jepang$horsepower)

library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'Auto':
## 
##     mpg

ggplot(Auto, aes(x=horsepower, y=mpg, color=as.factor(origin))) +
  geom_point()+
  labs(
    x = "Horsepower",
    y = "Mile per Gallon",
    color = "Negara Produsen",
    title = "Plot Hubungan antara Mile per Gallon dan Horsepower",
    subtitle = "Dipisahkan berdasalkan Asal Negara",
    caption = "Abdul Aziz N"
  ) +
  theme_minimal()+
  geom_line(color='red',data = Amerikana, aes(x=Amerika.horsepower, y=Amerika.prediksi.ns26))+
  geom_line(color='green',data = Eropana, aes(x=Eropa.horsepower, y=Eropa.prediksi.poly1))+
  geom_line(color='blue',data = Jepangana, aes(x=Jepang.horsepower, y=Jepang.prediksi.poly5))

08-STA581-Regresi Non Linear

Abdul Aziz Nurussadad

11/13/2020

1. Seluruh Data Mobil

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

A.2 Prediksi

A.3 Plot

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

B.2 Prediksi

B.3 Plot

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

C.2 Prediksi

C.3 Plot

D. Gabungan Plot

E. Delta terkecil

2. Mobil dari Masing-Masing Negara

3. Khusus Mobil Amerika

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

A.2 Prediksi

A.3 Plot

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

B.2 Prediksi

B.3 Plot

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

C.2 Prediksi

C.3 Plot

D. Gabungan Plot

E. Delta terkecil

4. Khusus Mobil Eropa

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

A.2 Prediksi

A.3 Plot

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

B.2 Prediksi

B.3 Plot

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

C.2 Prediksi

C.3 Plot

D. Gabungan Plot

E. Delta terkecil

5. Khusus Mobil Jepang

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

A.2 Prediksi

A.3 Plot

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

B.2 Prediksi

B.3 Plot

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

C.2 Prediksi

C.3 Plot

D. Gabungan Plot

E. Delta terkecil

6. Plot 3 Negara Menggunakan ggplot2

A. Model Terbaik Amerika

B. Model Terbaik Eropa

C. Model Terbaik Jepang