1. Seluruh Data Mobil

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

library(ISLR)
library(boot)
attach(Auto)
set.seed(1)
delta.1.poly <- rep(NA, 10)
delta.2.poly <- rep(NA, 10)
for (i in 1:10) {
  fit <- glm(mpg ~ poly(horsepower, i), data = Auto)
  delta.1.poly[i] <- cv.glm(Auto, fit, K=5)$delta[1]
  delta.2.poly[i] <- cv.glm(Auto, fit, K=5)$delta[2]
}
plot(1:10, delta.1.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.poly), delta.1.poly[which.min(delta.1.poly)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.poly), delta.2.poly[which.min(delta.2.poly)], col = "red", cex = 2, pch = 20)

A.2 Prediksi

prediksi.poly7 = predict( glm(mpg ~ poly(horsepower,7)) , data.frame(horsepower), interval="predict" )
prediksi.poly8 = predict( glm(mpg ~ poly(horsepower,8)) , data.frame(horsepower), interval="predict" )

A.3 Plot

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Regresi Polinomial Ordo 7 dan 8")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.poly7[ix], 
      main = "Polynomial Regression Ordo 7", col="blue",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.poly8[ix], 
      main = "Polynomial Regression Ordo 8", col="green",
      type="l", lwd=2)

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

library(ISLR)
library(boot)
attach(Auto)
## The following objects are masked from Auto (pos = 3):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year
set.seed(1)
delta.1.cut <- rep(NA, 10)
delta.2.cut <- rep(NA, 10)
for (i in 2:10) {
    Auto$horsepower.cut <- cut(Auto$horsepower, i)
    fit <- glm(mpg ~ horsepower.cut, data = Auto)
    delta.1.cut[i] <- cv.glm(Auto, fit, K = 5)$delta[1]
    delta.2.cut[i] <- cv.glm(Auto, fit, K = 5)$delta[2]
}
plot(1:10, delta.1.cut, xlab = "Cuts", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.cut), delta.1.cut[which.min(delta.1.cut)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.cut, xlab = "Cuts", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.cut), delta.2.cut[which.min(delta.2.cut)], col = "red", cex = 2, pch = 20)

B.2 Prediksi

prediksi.cut8 = predict( glm(mpg ~ cut(horsepower,8)) , data.frame(horsepower), interval="predict" )

B.3 Plot

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Piecewise Constant Regression Cut 8")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.cut8[ix], 
      main = "Piecewise Constant Regression Cut 8", col="red",
      type="l", lwd=2)

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

library(ISLR)
library(boot)
library(splines)
attach(Auto)
## The following objects are masked from Auto (pos = 4):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year
## The following objects are masked from Auto (pos = 5):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year
set.seed(1)
delta.1.ns <- rep(NA, 30)
delta.2.ns <- rep(NA, 30)
for (i in 1:30) {
    #Auto$horsepower.ns <- ns(Auto$horsepower, i)
    #fit <- glm(mpg ~ horsepower.ns, data = Auto)
    fit <- glm(mpg ~ ns(horsepower, i), data = Auto)
    delta.1.ns[i] <- cv.glm(Auto, fit, K = 5)$delta[1]
    delta.2.ns[i] <- cv.glm(Auto, fit, K = 5)$delta[2]
}
plot(1:30, delta.1.ns, xlab = "NS", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.ns), delta.1.ns[which.min(delta.1.ns)], col = "red", cex = 2, pch = 20)

plot(1:30, delta.2.ns, xlab = "NS", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.ns), delta.2.ns[which.min(delta.2.ns)], col = "red", cex = 2, pch = 20)

which.min(delta.1.ns)
## [1] 28
which.min(delta.2.ns)
## [1] 29

C.2 Prediksi

prediksi.ns28 = predict( glm(mpg ~ ns(horsepower,28)) , data.frame(horsepower), interval="predict" )
prediksi.ns29 = predict( glm(mpg ~ ns(horsepower,29)) , data.frame(horsepower), interval="predict" )

C.3 Plot

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Natural Cubic Spline Regression DF=28 dan DF=29")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.ns28[ix], 
      main = "Natural Cubic Spline Regression DF=28", col="black",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.ns29[ix], 
      main = "Natural Cubic Spline Regression DF=29", col="yellow",
      type="l", lwd=2)

D. Gabungan Plot

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Gabungan Plot Yang Terbaik")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.poly7[ix], 
      main = "Polynomial Regression Ordo 7", col="blue",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.poly8[ix], 
      main = "Polynomial Regression Ordo 8", col="green",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.cut8[ix], 
      main = "Piecewise Constant Regression Cut 8", col="red",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.ns28[ix], 
      main = "Natural Cubic Spline Regression DF=28", col="black",
      type="l", lwd=2)
lines(horsepower[ix], prediksi.ns29[ix], 
      main = "Natural Cubic Spline Regression DF=29", col="yellow",
      type="l", lwd=2)

E. Delta terkecil

min(delta.1.poly)
## [1] 18.68262
min(delta.1.cut, na.rm=TRUE)
## [1] 20.05522
min(delta.1.ns)
## [1] 18.25245

Delta terkecil, pada Natural Cubic Spline Regression dengan DF = 28.

plot(horsepower, mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Model Terbaik Natural Cubic Spline Regression DF=28")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi.ns28[ix], 
      main = "Natural Cubic Spline Regression DF=28", col="black",
      type="l", lwd=2)

2. Mobil dari Masing-Masing Negara

Membagi menjadi Tiap Negara, info di Keterangan Data Origin Origin of car (1. American, 2. European, 3. Japanese)

library(ISLR)
library(boot)
attach(Auto)
## The following objects are masked from Auto (pos = 3):
## 
##     acceleration, cylinders, displacement, horsepower, horsepower.cut,
##     mpg, name, origin, weight, year
## The following objects are masked from Auto (pos = 5):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year
## The following objects are masked from Auto (pos = 6):
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year
id.A<-Auto$origin==1
Amerika <- Auto[id.A,]

id.E<-Auto$origin==2 
Eropa <- Auto[id.E,]

id.J<-Auto$origin==3
Jepang <- Auto[id.J,]

3. Khusus Mobil Amerika

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.poly <- rep(NA, 10)
delta.2.poly <- rep(NA, 10)
for (i in 1:10) {
  fit <- glm(mpg ~ poly(horsepower, i), data = Amerika)
  delta.1.poly[i] <- cv.glm(Amerika, fit, K=5)$delta[1]
  delta.2.poly[i] <- cv.glm(Amerika, fit, K=5)$delta[2]
}
plot(1:10, delta.1.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.poly), delta.1.poly[which.min(delta.1.poly)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.poly), delta.2.poly[which.min(delta.2.poly)], col = "red", cex = 2, pch = 20)

A.2 Prediksi

Amerika$prediksi.poly6 = predict( glm(Amerika$mpg ~ poly(Amerika$horsepower,6)) , data.frame(Amerika$horsepower), interval="predict" )
Amerika$prediksi.poly7 = predict( glm(Amerika$mpg ~ poly(Amerika$horsepower,7)) , data.frame(Amerika$horsepower), interval="predict" )

A.3 Plot

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Regresi Polinomial Ordo 6 dan 7")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.poly6[ix], 
      main = "Polynomial Regression Ordo 6", col="blue",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.poly7[ix], 
      main = "Polynomial Regression Ordo 7", col="green",
      type="l", lwd=2)

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.cut <- rep(NA, 10)
delta.2.cut <- rep(NA, 10)
for (i in 2:10) {
    Amerika$horsepower.cut <- cut(Amerika$horsepower, i)
    fit <- glm(mpg ~ horsepower.cut, data = Amerika)
    delta.1.cut[i] <- cv.glm(Amerika, fit, K = 5)$delta[1]
    delta.2.cut[i] <- cv.glm(Amerika, fit, K = 5)$delta[2]
}
plot(1:10, delta.1.cut, xlab = "Cuts", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.cut), delta.1.cut[which.min(delta.1.cut)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.cut, xlab = "Cuts", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.cut), delta.2.cut[which.min(delta.2.cut)], col = "red", cex = 2, pch = 20)

B.2 Prediksi

Amerika$prediksi.cut9 = predict( glm(Amerika$mpg ~ cut(Amerika$horsepower,9)) , data.frame(Amerika$horsepower), interval="predict" )

B.3 Plot

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Piecewise Constant Regression Cut 9")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.cut9[ix], 
      main = "Piecewise Constant Regression Cut 9", col="red",
      type="l", lwd=2)

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

library(ISLR)
library(boot)
library(splines)
set.seed(1)
delta.1.ns <- rep(NA, 30)
delta.2.ns <- rep(NA, 30)
for (i in 1:30) {
    #Amerika$horsepower.ns <- ns(Amerika$horsepower, i)
    #fit <- glm(mpg ~ horsepower.ns, data = Amerika)
    fit <- glm(mpg ~ ns(horsepower, i), data = Amerika)
    delta.1.ns[i] <- cv.glm(Amerika, fit, K = 5)$delta[1]
    delta.2.ns[i] <- cv.glm(Amerika, fit, K = 5)$delta[2]
}
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
plot(1:30, delta.1.ns, xlab = "NS", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.ns), delta.1.ns[which.min(delta.1.ns)], col = "red", cex = 2, pch = 20)

plot(1:30, delta.2.ns, xlab = "NS", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.ns), delta.2.ns[which.min(delta.2.ns)], col = "red", cex = 2, pch = 20)

which.min(delta.1.ns)
## [1] 26
which.min(delta.2.ns)
## [1] 19

C.2 Prediksi

Amerika$prediksi.ns26 = predict( glm(Amerika$mpg ~ ns(Amerika$horsepower,26)) , data.frame(Amerika$horsepower), interval="predict" )
Amerika$prediksi.ns19 = predict( glm(Amerika$mpg ~ ns(Amerika$horsepower,19)) , data.frame(Amerika$horsepower), interval="predict" )

C.3 Plot

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Natural Cubic Spline Regression DF=26 dan DF=19")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.ns26[ix], 
      main = "Natural Cubic Spline Regression DF=26", col="black",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.ns19[ix], 
      main = "Natural Cubic Spline Regression DF=19", col="yellow",
      type="l", lwd=2)

D. Gabungan Plot

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Gabungan Plot Yang Terbaik")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.poly6[ix], 
      main = "Polynomial Regression Ordo 6", col="blue",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.poly7[ix], 
      main = "Polynomial Regression Ordo 7", col="green",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.cut9[ix], 
      main = "Piecewise Constant Regression Cut 9", col="red",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.ns26[ix], 
      main = "Natural Cubic Spline Regression DF=26", col="black",
      type="l", lwd=2)
lines(Amerika$horsepower[ix], Amerika$prediksi.ns19[ix], 
      main = "Natural Cubic Spline Regression DF=19", col="yellow",
      type="l", lwd=2)

E. Delta terkecil

min(delta.1.poly)
## [1] 14.77189
min(delta.1.cut, na.rm=TRUE)
## [1] 14.51988
min(delta.1.ns)
## [1] 12.78074

Delta terkecil, pada Natural Cubic Spline Regression dengan DF = 26.

plot(Amerika$horsepower, Amerika$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Model Terbaik Natural Cubic Spline Regression DF=26")
ix <- sort(Amerika$horsepower,index.return=T)$ix
lines(Amerika$horsepower[ix], Amerika$prediksi.ns26[ix], 
      main = "Natural Cubic Spline Regression DF=26", col="black",
      type="l", lwd=2)

4. Khusus Mobil Eropa

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.poly <- rep(NA, 10)
delta.2.poly <- rep(NA, 10)
for (i in 1:10) {
  fit <- glm(mpg ~ poly(horsepower, i), data = Eropa)
  delta.1.poly[i] <- cv.glm(Eropa, fit, K=5)$delta[1]
  delta.2.poly[i] <- cv.glm(Eropa, fit, K=5)$delta[2]
}
plot(1:10, delta.1.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.poly), delta.1.poly[which.min(delta.1.poly)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.poly), delta.2.poly[which.min(delta.2.poly)], col = "red", cex = 2, pch = 20)

A.2 Prediksi

Eropa$prediksi.poly1 = predict( glm(Eropa$mpg ~ poly(Eropa$horsepower,1)) , data.frame(Eropa$horsepower), interval="predict" )
Eropa$prediksi.poly2 = predict( glm(Eropa$mpg ~ poly(Eropa$horsepower,2)) , data.frame(Eropa$horsepower), interval="predict" )

A.3 Plot

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Regresi Polinomial Ordo 1 dan 2")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.poly1[ix], 
      main = "Polynomial Regression Ordo 1", col="blue",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.poly2[ix], 
      main = "Polynomial Regression Ordo 2", col="green",
      type="l", lwd=2)

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.cut <- rep(NA, 5)
delta.2.cut <- rep(NA, 5)
for (i in 2:5) {
    Eropa$horsepower.cut <- cut(Eropa$horsepower, i)
    fit <- glm(mpg ~ horsepower.cut, data = Eropa)
    delta.1.cut[i] <- cv.glm(Eropa, fit, K = 5)$delta[1]
    delta.2.cut[i] <- cv.glm(Eropa, fit, K = 5)$delta[2]
}
plot(1:5, delta.1.cut, xlab = "Cuts", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.cut), delta.1.cut[which.min(delta.1.cut)], col = "red", cex = 2, pch = 20)

plot(1:5, delta.2.cut, xlab = "Cuts", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.cut), delta.2.cut[which.min(delta.2.cut)], col = "red", cex = 2, pch = 20)

B.2 Prediksi

Eropa$prediksi.cut5 = predict( glm(Eropa$mpg ~ cut(Eropa$horsepower,5)) , data.frame(Eropa$horsepower), interval="predict" )

B.3 Plot

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Piecewise Constant Regression Cut 5")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.cut5[ix], 
      main = "Piecewise Constant Regression Cut 5", col="red",
      type="l", lwd=2)

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

library(ISLR)
library(boot)
library(splines)
set.seed(1)
delta.1.ns <- rep(NA, 30)
delta.2.ns <- rep(NA, 30)
for (i in 1:30) {
    #Eropa$horsepower.ns <- ns(Eropa$horsepower, i)
    #fit <- glm(mpg ~ horsepower.ns, data = Eropa)
    fit <- glm(mpg ~ ns(horsepower, i), data = Eropa)
    delta.1.ns[i] <- cv.glm(Eropa, fit, K = 4)$delta[1]
    delta.2.ns[i] <- cv.glm(Eropa, fit, K = 4)$delta[2]
}
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
plot(1:30, delta.1.ns, xlab = "NS", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.ns), delta.1.ns[which.min(delta.1.ns)], col = "red", cex = 2, pch = 20)

plot(1:30, delta.2.ns, xlab = "NS", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.ns), delta.2.ns[which.min(delta.2.ns)], col = "red", cex = 2, pch = 20)

which.min(delta.1.ns)
## [1] 2
which.min(delta.2.ns)
## [1] 1

C.2 Prediksi

Eropa$prediksi.ns2 = predict( glm(Eropa$mpg ~ ns(Eropa$horsepower,2)) , data.frame(Eropa$horsepower), interval="predict" )
Eropa$prediksi.ns1 = predict( glm(Eropa$mpg ~ ns(Eropa$horsepower,1)) , data.frame(Eropa$horsepower), interval="predict" )

C.3 Plot

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Natural Cubic Spline Regression DF=2 dan DF=1")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.ns2[ix], 
      main = "Natural Cubic Spline Regression DF=2", col="black",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.ns1[ix], 
      main = "Natural Cubic Spline Regression DF=1", col="yellow",
      type="l", lwd=2)

D. Gabungan Plot

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Gabungan Plot Yang Terbaik")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.poly1[ix], 
      main = "Polynomial Regression Ordo 1", col="blue",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.poly2[ix], 
      main = "Polynomial Regression Ordo 2", col="green",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.cut5[ix], 
      main = "Piecewise Constant Regression Cut 5", col="red",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.ns2[ix], 
      main = "Natural Cubic Spline Regression DF=2", col="black",
      type="l", lwd=2)
lines(Eropa$horsepower[ix], Eropa$prediksi.ns1[ix], 
      main = "Natural Cubic Spline Regression DF=1", col="yellow",
      type="l", lwd=2)

E. Delta terkecil

min(delta.1.poly)
## [1] 23.42186
min(delta.1.cut, na.rm=TRUE)
## [1] 28.55445
min(delta.1.ns)
## [1] 23.62112

Delta terkecil, pada Polynomial Regression dengan Ordo 1.

plot(Eropa$horsepower, Eropa$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Model Terbaik Polynomial Regression Ordo 1")
ix <- sort(Eropa$horsepower,index.return=T)$ix
lines(Eropa$horsepower[ix], Eropa$prediksi.poly1[ix], 
      main = "Polynomial Regression Ordo 1", col="black",
      type="l", lwd=2)

5. Khusus Mobil Jepang

A. Polynomial Regression

A.1 Mencari Derajat Polinomial terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.poly <- rep(NA, 10)
delta.2.poly <- rep(NA, 10)
for (i in 1:10) {
  fit <- glm(mpg ~ poly(horsepower, i), data = Jepang)
  delta.1.poly[i] <- cv.glm(Jepang, fit, K=5)$delta[1]
  delta.2.poly[i] <- cv.glm(Jepang, fit, K=5)$delta[2]
}
plot(1:10, delta.1.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.poly), delta.1.poly[which.min(delta.1.poly)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.poly, xlab = "Polynomial Degree", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.poly), delta.2.poly[which.min(delta.2.poly)], col = "red", cex = 2, pch = 20)

A.2 Prediksi

Jepang$prediksi.poly5 = predict( glm(Jepang$mpg ~ poly(Jepang$horsepower,5)) , data.frame(Jepang$horsepower), interval="predict" )
Jepang$prediksi.poly3 = predict( glm(Jepang$mpg ~ poly(Jepang$horsepower,3)) , data.frame(Jepang$horsepower), interval="predict" )

A.3 Plot

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Regresi Polinomial Ordo 5 dan 3")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.poly5[ix], 
      main = "Polynomial Regression Ordo 5", col="blue",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.poly3[ix], 
      main = "Polynomial Regression Ordo 3", col="green",
      type="l", lwd=2)

B. Piecewise Constant Regression

B.1 Mencari Cut terbaik

library(ISLR)
library(boot)
set.seed(1)
delta.1.cut <- rep(NA, 5)
delta.2.cut <- rep(NA, 5)
for (i in 2:5) {
    Jepang$horsepower.cut <- cut(Jepang$horsepower, i)
    fit <- glm(mpg ~ horsepower.cut, data = Jepang)
    delta.1.cut[i] <- cv.glm(Jepang, fit, K = 5)$delta[1]
    delta.2.cut[i] <- cv.glm(Jepang, fit, K = 5)$delta[2]
}
plot(1:5, delta.1.cut, xlab = "Cuts", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.cut), delta.1.cut[which.min(delta.1.cut)], col = "red", cex = 2, pch = 20)

plot(1:5, delta.2.cut, xlab = "Cuts", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.cut), delta.2.cut[which.min(delta.2.cut)], col = "red", cex = 2, pch = 20)

B.2 Prediksi

Jepang$prediksi.cut4 = predict( glm(Jepang$mpg ~ cut(Jepang$horsepower,4)) , data.frame(Jepang$horsepower), interval="predict" )

B.3 Plot

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Piecewise Constant Regression Cut 4")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.cut4[ix], 
      main = "Piecewise Constant Regression Cut 4", col="red",
      type="l", lwd=2)

C. Natural Cubic Spline Regression

C.1 Mencari Jumlah Knot terbaik

library(ISLR)
library(boot)
library(splines)
set.seed(1)
delta.1.ns <- rep(NA, 10)
delta.2.ns <- rep(NA, 10)
for (i in 1:10) {
    #Jepang$horsepower.ns <- ns(Jepang$horsepower, i)
    #fit <- glm(mpg ~ horsepower.ns, data = Jepang)
    fit <- glm(mpg ~ ns(horsepower, i), data = Jepang)
    delta.1.ns[i] <- cv.glm(Jepang, fit, K = 5)$delta[1]
    delta.2.ns[i] <- cv.glm(Jepang, fit, K = 5)$delta[2]
}
plot(1:10, delta.1.ns, xlab = "NS", ylab = "Test MSE - Delta 1", type = "l")
points(which.min(delta.1.ns), delta.1.ns[which.min(delta.1.ns)], col = "red", cex = 2, pch = 20)

plot(1:10, delta.2.ns, xlab = "NS", ylab = "Test MSE - Delta 2", type = "l")
points(which.min(delta.2.ns), delta.2.ns[which.min(delta.2.ns)], col = "red", cex = 2, pch = 20)

which.min(delta.1.ns)
## [1] 8
which.min(delta.2.ns)
## [1] 3

C.2 Prediksi

Jepang$prediksi.ns8 = predict( glm(Jepang$mpg ~ ns(Jepang$horsepower,8)) , data.frame(Jepang$horsepower), interval="predict" )
Jepang$prediksi.ns3 = predict( glm(Jepang$mpg ~ ns(Jepang$horsepower,3)) , data.frame(Jepang$horsepower), interval="predict" )

C.3 Plot

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Natural Cubic Spline Regression DF=8 dan DF=3")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.ns8[ix], 
      main = "Natural Cubic Spline Regression DF=8", col="black",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.ns3[ix], 
      main = "Natural Cubic Spline Regression DF=3", col="yellow",
      type="l", lwd=2)

D. Gabungan Plot

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Gabungan Plot Yang Terbaik")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.poly5[ix], 
      main = "Polynomial Regression Ordo 5", col="blue",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.poly3[ix], 
      main = "Polynomial Regression Ordo 3", col="green",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.cut4[ix], 
      main = "Piecewise Constant Regression Cut 4", col="red",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.ns8[ix], 
      main = "Natural Cubic Spline Regression DF=8", col="black",
      type="l", lwd=2)
lines(Jepang$horsepower[ix], Jepang$prediksi.ns3[ix], 
      main = "Natural Cubic Spline Regression DF=3", col="yellow",
      type="l", lwd=2)

E. Delta terkecil

min(delta.1.poly)
## [1] 17.29743
min(delta.1.cut, na.rm=TRUE)
## [1] 19.75088
min(delta.1.ns)
## [1] 18.8119

Delta terkecil, pada Polynomiale Regression dengan Ordo 5.

plot(Jepang$horsepower, Jepang$mpg, pch=19, col="coral",
     xlab="Horse Power", ylab="Mile per Gallon",
     main="Model Terbaik Polynomial Regression Ordo 5")
ix <- sort(Jepang$horsepower,index.return=T)$ix
lines(Jepang$horsepower[ix], Jepang$prediksi.poly5[ix], 
      main = " Polynomial Regression Ordo 56", col="black",
      type="l", lwd=2)

6. Plot 3 Negara Menggunakan ggplot2

A. Model Terbaik Amerika

Amerikana<-data.frame(Amerika$prediksi.ns26,Amerika$horsepower)

B. Model Terbaik Eropa

Eropana<-data.frame(Eropa$prediksi.poly1,Eropa$horsepower)

C. Model Terbaik Jepang

Jepangana <- data.frame(Jepang$prediksi.poly5,Jepang$horsepower)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'Auto':
## 
##     mpg
ggplot(Auto, aes(x=horsepower, y=mpg, color=as.factor(origin))) +
  geom_point()+
  labs(
    x = "Horsepower",
    y = "Mile per Gallon",
    color = "Negara Produsen",
    title = "Plot Hubungan antara Mile per Gallon dan Horsepower",
    subtitle = "Dipisahkan berdasalkan Asal Negara",
    caption = "Abdul Aziz N"
  ) +
  theme_minimal()+
  geom_line(color='red',data = Amerikana, aes(x=Amerika.horsepower, y=Amerika.prediksi.ns26))+
  geom_line(color='green',data = Eropana, aes(x=Eropa.horsepower, y=Eropa.prediksi.poly1))+
  geom_line(color='blue',data = Jepangana, aes(x=Jepang.horsepower, y=Jepang.prediksi.poly5))