library yang digunakan
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(splines)
library(rsample)
## Warning: package 'rsample' was built under R version 4.0.5
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.5
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
head(cbind(mpg, horsepower))
## mpg horsepower
## [1,] 18 130
## [2,] 15 165
## [3,] 18 150
## [4,] 16 150
## [5,] 17 140
## [6,] 15 198
plot(horsepower, mpg, pch=19, col="cadetblue",
xlab="Horse Power", ylab="mile per gallon")
set.seed(1234)
cross_val <- vfold_cv(Auto,v=10,strata = "mpg")
d<-1:10
best_poly<-map_dfr(d, function(i){
metric_poly <- map_dfr(cross_val$splits,
function(x){
mod <- lm(mpg ~ poly(horsepower, d = i),
data=Auto[x$in_id,])
pred <- predict(mod,newdata=Auto[-x$in_id,])
truth <- Auto[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_poly
#menghitung rata rata untuk 10 folds
mean_metric_poly<-colMeans(metric_poly)
mean_metric_poly
}
)
best_poly <- cbind(d=d,best_poly)
best_poly
## d rmse mae
## 1 1 4.908935 3.858832
## 2 2 4.352806 3.261129
## 3 3 4.356807 3.264025
## 4 4 4.354783 3.274987
## 5 5 4.307652 3.238723
## 6 6 4.301354 3.241417
## 7 7 4.287438 3.222540
## 8 8 4.302908 3.240966
## 9 9 4.321277 3.247392
## 10 10 4.336836 3.257891
polinom = lm(mpg ~ poly(horsepower,7), data=Auto)
prediksi = predict( polinom , data.frame(Auto), interval="predict" )
plot(horsepower, mpg, pch=19, col="cadetblue1",
xlab="Horse Power", ylab="mile per gallon",
main="Regresi Polinomial Ordo 4")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi[ix],
main = "Polynomial Regression", col="red",
type="l", lwd=2)
set.seed(1234)
cross_val <- vfold_cv(Auto,v=10,strata = "mpg")
breaks <- 2:10
best_tangga <- map_dfr(breaks, function(i){
metric_tangga <- map_dfr(cross_val$splits,
function(x){
training <- Auto[x$in_id,]
training$horsepower <- cut(training$horsepower,i)
mod <- lm(mpg ~ horsepower,
data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))
testing <- Auto[-x$in_id,]
horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
pred <- predict(mod,
newdata=list(horsepower=horsepower_new))
truth <- testing$mpg
data_eval <- na.omit(data.frame(truth,pred))
rmse <- mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred
)
mae <- mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_tangga
# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)
mean_metric_tangga
})
best_tangga <- cbind(d=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
## d rmse mae
## 1 2 6.122422 4.776980
## 2 3 5.766535 4.520709
## 3 4 5.020265 3.805185
## 4 5 4.706711 3.572772
## 5 6 4.658138 3.539010
## 6 7 4.572175 3.405770
## 7 8 4.438716 3.398562
## 8 9 4.498963 3.435017
## 9 10 4.569136 3.437797
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="cadetblue4") +
stat_smooth(method = "lm",
formula = y~cut(x,8),
lty = 1, col = "red",se = F)+
theme_bw()
set.seed(1234)
cross_val <- vfold_cv(Auto,v=10,strata = "mpg")
df <- 3:12
best_spline3 <- map_dfr(df, function(i){
metric_spline3 <- map_dfr(cross_val$splits,
function(x){
mod <- lm(mpg ~ ns(horsepower,df=i),
data=Auto[x$in_id,])
pred <- predict(mod,
newdata=Auto[-x$in_id,])
truth <- Auto[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,
response = pred
)
mae <- mlr3measures::mae(truth = truth,
response = pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_spline3
# menghitung rata-rata untuk 10 folds
mean_metric_spline3 <- colMeans(metric_spline3)
mean_metric_spline3
}
)
best_spline3 <- cbind(d=df,best_spline3)
# menampilkan hasil all breaks
best_spline3
## d rmse mae
## 1 3 4.342637 3.259033
## 2 4 4.312147 3.242972
## 3 5 4.300381 3.235026
## 4 6 4.298908 3.229285
## 5 7 4.275443 3.217522
## 6 8 4.303139 3.250454
## 7 9 4.293479 3.231677
## 8 10 4.277135 3.199777
## 9 11 4.341622 3.255280
## 10 12 4.314328 3.216725
attr(ns(Auto$horsepower, df=10),"knots")
## 10% 20% 30% 40% 50% 60% 70% 80% 90%
## 67.0 72.0 80.0 88.0 93.5 100.0 110.0 140.0 157.7
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="steelblue") +
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(67.0, 72.0, 80.0, 88.0, 93.5, 100.0, 110.0, 140.0, 157.7 )),
lty = 1,se = F, col="red")
nilai_metric <- rbind(best_poly %>% select(-1) %>% slice_min(mae),
best_tangga %>% select(-1) %>% slice_min(mae),
best_spline3 %>% select(-1)%>% slice_min(mae)
)
nama_model <- c("Polynomial7","Piecewise constant(breaks=8)",
"Natural cubic spline(df=10)"
)
data.frame(nama_model,nilai_metric)
## nama_model rmse mae
## 1 Polynomial7 4.287438 3.222540
## 2 Piecewise constant(breaks=8) 4.438716 3.398562
## 3 Natural cubic spline(df=10) 4.277135 3.199777
fungsi regresi terbaik yaitu spline dengan df=12
id.A<-Auto$origin==1
Amerika<-Auto[id.A,]
plot(Amerika$horsepower, Amerika$mpg, pch=19, col="aquamarine4",
xlab="Horse Power", ylab="mile per gallon")
set.seed(1234)
cross_val <- vfold_cv(Amerika,v=10,strata = "mpg")
d<-1:10
best_poly<-map_dfr(d, function(i){
metric_poly <- map_dfr(cross_val$splits,
function(x){
mod <- lm(mpg ~ poly(horsepower, d = i),
data=Amerika[x$in_id,])
pred <- predict(mod,newdata=Amerika[-x$in_id,])
truth <- Amerika[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_poly
#menghitung rata rata untuk 10 folds
mean_metric_poly<-colMeans(metric_poly)
mean_metric_poly
}
)
best_poly <- cbind(d=d,best_poly)
best_poly
## d rmse mae
## 1 1 4.268396 3.361861
## 2 2 3.806887 2.894027
## 3 3 3.824819 2.908566
## 4 4 3.835479 2.901702
## 5 5 3.806815 2.895982
## 6 6 3.800663 2.889671
## 7 7 3.793874 2.863246
## 8 8 3.806333 2.870046
## 9 9 3.819989 2.879042
## 10 10 4.155978 3.010712
set.seed(1234)
cross_val <- vfold_cv(Amerika,v=10,strata = "mpg")
breaks <- 2:11
best_tangga <- map_dfr(breaks, function(i){
metric_tangga <- map_dfr(cross_val$splits,
function(x){
training <- Amerika[x$in_id,]
training$horsepower <- cut(training$horsepower,i)
mod <- lm(mpg ~ horsepower,
data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))
testing <- Amerika[-x$in_id,]
horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
pred <- predict(mod,
newdata=list(horsepower=horsepower_new))
truth <- testing$mpg
data_eval <- na.omit(data.frame(truth,pred))
rmse <- mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred
)
mae <- mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_tangga
# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)
mean_metric_tangga
})
best_tangga <- cbind(d=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
## d rmse mae
## 1 2 4.984048 3.826708
## 2 3 4.685397 3.651166
## 3 4 4.162464 3.158257
## 4 5 4.014042 3.098955
## 5 6 4.234363 3.198962
## 6 7 4.091327 3.149676
## 7 8 3.903648 2.955931
## 8 9 3.744940 2.820420
## 9 10 3.954484 2.993423
## 10 11 3.901662 2.936554
set.seed(1234)
cross_val <- vfold_cv(Amerika,v=10,strata = "mpg")
df <- 2:12
best_spline3 <- map_dfr(df, function(i){
metric_spline3 <- map_dfr(cross_val$splits,
function(x){
mod <- lm(mpg ~ ns(horsepower,df=i),
data=Amerika[x$in_id,])
pred <- predict(mod,
newdata=Amerika[-x$in_id,])
truth <- Amerika[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,
response = pred
)
mae <- mlr3measures::mae(truth = truth,
response = pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_spline3
# menghitung rata-rata untuk 10 folds
mean_metric_spline3 <- colMeans(metric_spline3)
mean_metric_spline3
}
)
best_spline3 <- cbind(d=df,best_spline3)
# menampilkan hasil all breaks
best_spline3
## d rmse mae
## 1 2 3.793072 2.884487
## 2 3 3.816322 2.901572
## 3 4 3.831471 2.911463
## 4 5 3.788643 2.846129
## 5 6 3.810135 2.874073
## 6 7 3.787742 2.831774
## 7 8 3.784609 2.824976
## 8 9 3.790436 2.839366
## 9 10 3.780952 2.820648
## 10 11 3.804090 2.849770
## 11 12 3.816358 2.839208
nilai_metric <- rbind(best_poly %>% select(-1) %>% slice_min(mae),
best_tangga %>% select(-1) %>% slice_min(mae),
best_spline3 %>% select(-1)%>% slice_min(mae)
)
nama_model <- c("Polynomial7","Piecewise constant(breaks=9)",
"Natural cubic spline(df=10)"
)
data.frame(nama_model,nilai_metric)
## nama_model rmse mae
## 1 Polynomial7 3.793874 2.863246
## 2 Piecewise constant(breaks=9) 3.744940 2.820420
## 3 Natural cubic spline(df=10) 3.780952 2.820648
model terbaik bagi data Amerika adalah fungsi tangga dengan breaks=9
id.E<-Auto$origin==2
Eropa<-Auto[id.E,]
plot(Eropa$horsepower, Eropa$mpg, pch=19, col="aquamarine4",
xlab="Horse Power", ylab="mile per gallon")
set.seed(1234)
cross_val <- vfold_cv(Eropa,v=10,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
d<-1:10
best_poly<-map_dfr(d, function(i){
metric_poly <- map_dfr(cross_val$splits,
function(x){
mod <- lm(mpg ~ poly(horsepower, d = i),
data=Eropa[x$in_id,])
pred <- predict(mod,newdata=Eropa[-x$in_id,])
truth <- Eropa[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_poly
#menghitung rata rata untuk 10 folds
mean_metric_poly<-colMeans(metric_poly)
mean_metric_poly
}
)
best_poly <- cbind(d=d,best_poly)
best_poly
## d rmse mae
## 1 1 4.748730 3.809553
## 2 2 4.825216 3.870409
## 3 3 4.890907 3.933474
## 4 4 4.951956 4.035466
## 5 5 5.112665 4.153303
## 6 6 6.750648 4.942009
## 7 7 7.098575 5.014004
## 8 8 6.338606 4.610701
## 9 9 16.228101 8.698614
## 10 10 10.059234 6.019488
set.seed(1234)
cross_val <- vfold_cv(Eropa,v=9,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
breaks <- 2:9
best_tangga <- map_dfr(breaks, function(i){
metric_tangga <- map_dfr(cross_val$splits,
function(x){
training <- Eropa[x$in_id,]
training$horsepower <- cut(training$horsepower,i)
mod <- lm(mpg ~ horsepower,
data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))
testing <- Eropa[-x$in_id,]
horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
pred <- predict(mod,
newdata=list(horsepower=horsepower_new))
truth <- testing$mpg
data_eval <- na.omit(data.frame(truth,pred))
rmse <- mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred
)
mae <- mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_tangga
# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)
mean_metric_tangga
})
best_tangga <- cbind(d=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
## d rmse mae
## 1 2 5.371208 4.250614
## 2 3 5.578173 4.533519
## 3 4 5.144853 4.183493
## 4 5 5.087614 4.003546
## 5 6 5.236109 4.136374
## 6 7 5.102072 3.975827
## 7 8 5.156002 4.146550
## 8 9 5.374641 4.286823
set.seed(1234)
cross_val <- vfold_cv(Eropa,v=10,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
df <- 2:12
best_spline3 <- map_dfr(df, function(i){
metric_spline3 <- map_dfr(cross_val$splits,
function(x){
mod <- lm(mpg ~ ns(horsepower,df=i),
data=Eropa[x$in_id,])
pred <- predict(mod,
newdata=Eropa[-x$in_id,])
truth <- Eropa[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,
response = pred
)
mae <- mlr3measures::mae(truth = truth,
response = pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_spline3
# menghitung rata-rata untuk 10 folds
mean_metric_spline3 <- colMeans(metric_spline3)
mean_metric_spline3
}
)
best_spline3 <- cbind(d=df,best_spline3)
# menampilkan hasil all breaks
best_spline3
## d rmse mae
## 1 2 4.824097 3.870036
## 2 3 4.893425 3.935614
## 3 4 4.922407 3.953072
## 4 5 5.007375 4.017736
## 5 6 5.047184 3.998508
## 6 7 5.177158 4.099570
## 7 8 5.245169 4.180753
## 8 9 5.378872 4.295886
## 9 10 5.462338 4.369747
## 10 11 5.557164 4.404541
## 11 12 5.657355 4.486990
nilai_metric <- rbind(best_poly %>% select(-1) %>% slice_min(mae),
best_tangga %>% select(-1) %>% slice_min(mae),
best_spline3 %>% select(-1)%>% slice_min(mae)
)
nama_model <- c("Polynomial1","Piecewise constant(breaks=7)",
"Natural cubic spline(df=2)"
)
data.frame(nama_model,nilai_metric)
## nama_model rmse mae
## 1 Polynomial1 4.748730 3.809553
## 2 Piecewise constant(breaks=7) 5.102072 3.975827
## 3 Natural cubic spline(df=2) 4.824097 3.870036
model terbaik bagi data Amerika adalah polinom dengan ordo 1
id.J<-Auto$origin==3
Jepang<-Auto[id.J,]
plot(Jepang$horsepower, Jepang$mpg, pch=19, col="aquamarine4",
xlab="Horse Power", ylab="mile per gallon")
set.seed(1234)
cross_val <- vfold_cv(Jepang,v=10,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
d<-1:10
best_poly<-map_dfr(d, function(i){
metric_poly <- map_dfr(cross_val$splits,
function(x){
mod <- lm(mpg ~ poly(horsepower, d = i),
data=Jepang[x$in_id,])
pred <- predict(mod,newdata=Jepang[-x$in_id,])
truth <- Jepang[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_poly
#menghitung rata rata untuk 10 folds
mean_metric_poly<-colMeans(metric_poly)
mean_metric_poly
}
)
best_poly <- cbind(d=d,best_poly)
best_poly
## d rmse mae
## 1 1 4.379588 3.511243
## 2 2 4.407709 3.514377
## 3 3 3.891911 2.990732
## 4 4 4.507924 3.295015
## 5 5 3.933976 3.050013
## 6 6 4.189597 3.219087
## 7 7 7.020382 4.241221
## 8 8 14.692013 6.826629
## 9 9 18.866965 8.179589
## 10 10 15.366406 6.988463
set.seed(1234)
cross_val <- vfold_cv(Jepang,v=5,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
breaks <- 2:5
best_tangga <- map_dfr(breaks, function(i){
metric_tangga <- map_dfr(cross_val$splits,
function(x){
training <- Jepang[x$in_id,]
training$horsepower <- cut(training$horsepower,i)
mod <- lm(mpg ~ horsepower,
data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))
testing <- Jepang[-x$in_id,]
horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
pred <- predict(mod,
newdata=list(horsepower=horsepower_new))
truth <- testing$mpg
data_eval <- na.omit(data.frame(truth,pred))
rmse <- mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred
)
mae <- mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_tangga
# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)
mean_metric_tangga
})
best_tangga <- cbind(d=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
## d rmse mae
## 1 2 4.657624 3.419243
## 2 3 4.276138 3.306548
## 3 4 4.236296 3.341741
## 4 5 4.310206 3.423978
set.seed(1234)
cross_val <- vfold_cv(Jepang,v=10,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
df <- 2:12
best_spline3 <- map_dfr(df, function(i){
metric_spline3 <- map_dfr(cross_val$splits,
function(x){
mod <- lm(mpg ~ ns(horsepower,df=i),
data=Jepang[x$in_id,])
pred <- predict(mod,
newdata=Jepang[-x$in_id,])
truth <- Jepang[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,
response = pred
)
mae <- mlr3measures::mae(truth = truth,
response = pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_spline3
# menghitung rata-rata untuk 10 folds
mean_metric_spline3 <- colMeans(metric_spline3)
mean_metric_spline3
}
)
best_spline3 <- cbind(d=df,best_spline3)
# menampilkan hasil all breaks
best_spline3
## d rmse mae
## 1 2 4.420154 3.531408
## 2 3 4.075356 3.131341
## 3 4 4.143401 3.186480
## 4 5 4.114731 3.094869
## 5 6 4.219534 3.191374
## 6 7 4.274199 3.246116
## 7 8 4.341063 3.307820
## 8 9 4.340789 3.318587
## 9 10 4.471304 3.385200
## 10 11 4.515272 3.443309
## 11 12 4.599257 3.540302
nilai_metric <- rbind(best_poly %>% select(-1) %>% slice_min(mae),
best_tangga %>% select(-1) %>% slice_min(mae),
best_spline3 %>% select(-1)%>% slice_min(mae)
)
nama_model <- c("Polynomial3","Piecewise constant(breaks=3)",
"Natural cubic spline(df=5)"
)
data.frame(nama_model,nilai_metric)
## nama_model rmse mae
## 1 Polynomial3 3.891911 2.990732
## 2 Piecewise constant(breaks=3) 4.276138 3.306548
## 3 Natural cubic spline(df=5) 4.114731 3.094869
model terbaik bagi data jepang adalah polinomial ordo 3
par(mfrow=c(1,3))
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="aquamarine4") +
stat_smooth(method = "lm",
formula = y~cut(x,9),
lty = 1, col = "red",se = F)+
theme_bw()
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="aquamarine4") +
stat_smooth(method = "lm",
formula = y~poly(x,1,raw=T),
lty = 1, col = "red",se = F)+
theme_bw()
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="aquamarine4") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "red",se = F)+
theme_bw()