# A tibble: 3 × 6
origin count mean_mpg sd_mpg min_mpg max_mpg
<chr> <int> <dbl> <dbl> <dbl> <dbl>
1 Amerika 245 20.0 6.44 9 39
2 Eropa 68 27.6 6.58 16.2 44.3
3 Jepang 79 30.5 6.09 18 46.6
ggplot(Auto, aes(x = origin, y = mpg, fill = origin)) +geom_boxplot(alpha =0.6, color ="black") +# boxplot dengan warna softgeom_jitter(width =0.2, alpha =0.7, color ="black", size =1.5) +# titik sebaranlabs(title ="Sebaran mpg per origin",x ="",y ="mpg" ) +scale_fill_brewer(palette ="Pastel1") +# warna pastel lembuttheme_minimal() +theme(plot.title =element_text(hjust =0.5, size =14, face ="bold"),axis.title.y =element_text(size =12, face ="bold"),axis.text.x =element_text(size =11, face ="bold") )
ggplot(Auto, aes(x = origin, y = horsepower, fill = origin)) +geom_boxplot(alpha =0.6, color ="black") +# boxplot dengan warna softgeom_jitter(width =0.2, alpha =0.7, color ="black", size =1.5) +# titik sebaranlabs(title ="Sebaran horsepower per origin",x ="",y ="mpg" ) +scale_fill_brewer(palette ="Pastel1") +# warna pastel lembuttheme_minimal() +theme(plot.title =element_text(hjust =0.5, size =14, face ="bold"),axis.title.y =element_text(size =12, face ="bold"),axis.text.x =element_text(size =11, face ="bold") )
ggplot(Auto, aes(x = horsepower, y = mpg, color = origin)) +geom_point(alpha =0.7) +facet_wrap(~ origin) +labs(title ="Scatter Plot MPG vs Horsepower per Origin",x ="Horsepower",y ="MPG" ) +theme_minimal() +theme(plot.title =element_text(hjust =0.5, face ="bold", size =14),strip.text =element_text(face ="bold", size =12) )
Fungsi Tangga (Piecewise Constant)
# pastikan origin factorAuto$origin <-factor(Auto$origin, labels =c("Amerika", "Eropa", "Jepang"))# buat step cutAuto <- Auto %>%mutate(hp_cut =cut(horsepower, 12))# model per origin + prediksimodels <- Auto %>%group_by(origin) %>%group_modify(~{ mod <-lm(mpg ~ hp_cut, data = .x) prd <-predict(mod, interval ="predict") prd <-as.data.frame(prd) # ubah ke data framecolnames(prd) <-c("pred", "lwr", "upr") # rename agar konsistenbind_cols(.x, prd) })# plotggplot(models, aes(x = horsepower, y = mpg)) +geom_point(color ="coral", alpha =0.7) +geom_line(aes(y = pred), color ="blue", linewidth =1.3) +labs(title ="Step Function mpg ~ horsepower per Origin",x ="Horsepower",y ="MPG" ) +facet_wrap(~ origin) +theme_minimal() +theme(plot.title =element_text(hjust =0.5, face ="bold", size =14),strip.text =element_text(size =12, face ="bold") )
ggplot(models, aes(x = horsepower, y = mpg, color = origin)) +geom_point(alpha =0.6) +geom_line(aes(y = pred), linewidth =1.3) +labs(title ="Step Function mpg ~ horsepower per Origin",x ="Horsepower",y ="MPG",color ="Origin" ) +theme_minimal() +theme(plot.title =element_text(hjust =0.5, face ="bold", size =14) )
Regresi Polinomial
# pastikan origin factorAuto$origin <-factor(Auto$origin, labels =c("US", "Europe", "Japan"))# ---- Fit model polynomial 4 per origin + buat prediksi ----models_poly <- Auto %>%group_by(origin) %>%group_modify(~{# fit model polynomial 4 mod <-lm(mpg ~poly(horsepower, 4), data = .x)# buat prediksi untuk horsepower yang ada prd <-predict(mod,newdata =data.frame(horsepower = .x$horsepower),interval ="predict") prd <-as.data.frame(prd)colnames(prd) <-c("pred", "lwr", "upr")# urutkan horsepower agar garis mulus ix <-order(.x$horsepower)# kembalikan data + prediksi + index urutbind_cols(.x, prd) %>%mutate(ix = ix) })# ---- Plot ----ggplot(models_poly, aes(x = horsepower, y = mpg)) +geom_point(color ="coral", alpha =0.7) +geom_line(aes(y = pred), color ="blue", linewidth =1.3) +labs(title ="Regresi Polinomial Ordo 4 per Origin",x ="Horsepower",y ="MPG" ) +facet_wrap(~ origin) +theme_minimal() +theme(plot.title =element_text(hjust =0.5, face ="bold", size =14),strip.text =element_text(size =12, face ="bold") )
# ---- Plot dalam 1 grafik ----ggplot(models_poly, aes(x = horsepower, y = mpg, color = origin)) +geom_point(alpha =0.6) +geom_line(aes(y = pred), linewidth =1.3) +labs(title ="Regresi Polinomial Orde 4 (Dalam 1 Grafik)",x ="Horsepower",y ="MPG",color ="Origin" ) +theme_minimal() +theme(plot.title =element_text(hjust =0.5, face ="bold", size =14) )
Regresi Spline
library(splines)library(dplyr)# Datadf <- Auto # atau dataset Andadf$origin <-factor(df$origin)# ----------------------------# 1. FUNGSI: memilih knot optimal per origin# ----------------------------pilih_knot_optimal <-function(data_origin) { hp <- data_origin$horsepower# kandidat knot (kuantil) kandidat <-quantile(hp, probs =c(0.25, 0.5, 0.75))# coba model dengan 0 sampai 3 knot model_list <-list() AIC_list <-c()# Model tanpa knot (hanya polynomial spline) m0 <-lm(mpg ~bs(hp, df =4), data = data_origin) model_list[[1]] <- m0 AIC_list[1] <-AIC(m0)# Model dengan 1–3 knotfor (k in1:3) { knots <- kandidat[1:k] m <-lm(mpg ~bs(hp, knots = knots), data = data_origin) model_list[[k+1]] <- m AIC_list[k+1] <-AIC(m) } idx <-which.min(AIC_list)return(list(model = model_list[[idx]], knot =ifelse(idx==1, NA, kandidat[1:(idx-1)])))}# ----------------------------# 2. FIT model per origin# ----------------------------origin_list <-split(df, df$origin)hasil <-lapply(origin_list, pilih_knot_optimal)# ----------------------------# 3. PLOT GABUNG (multi-line dalam 1 grafik)# ----------------------------cols <-c("red", "blue", "darkgreen") # warna per originnames(cols) <-levels(df$origin)plot(df$horsepower, df$mpg,pch =19, col = cols[df$origin],main ="Spline Regresi MPG ~ Horsepower per Origin (Knot Optimal)",xlab ="Horsepower", ylab ="MPG")# garis spline per originfor (o inlevels(df$origin)) { dat_o <- origin_list[[o]] hp_sorted <-sort(dat_o$horsepower) model_o <- hasil[[o]]$model pred_o <-predict(model_o,newdata =data.frame(hp = hp_sorted))lines(hp_sorted, pred_o,col = cols[o], lwd =2)# tambah garis vertikal jika ada knot knot_o <- hasil[[o]]$knotif (!is.na(knot_o[1])) {abline(v = knot_o, col = cols[o], lty =2) }}legend("topright",legend =paste0("Origin ", levels(df$origin)),col = cols, lwd =2)