library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(splines2)
df_airquality <- datasets::airquality
head(df_airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
# Load data
data("airquality")
# Cek data dan hilangkan NA
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
aq <- na.omit(airquality)
# Model regresi sederhana
model_ozone_temp <- lm(Ozone ~ Temp, data = aq)
# Ringkasan hasil regresi
summary(model_ozone_temp)
##
## Call:
## lm(formula = Ozone ~ Temp, data = aq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.922 -17.459 -0.874 10.444 118.078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -147.6461 18.7553 -7.872 2.76e-12 ***
## Temp 2.4391 0.2393 10.192 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.92 on 109 degrees of freedom
## Multiple R-squared: 0.488, Adjusted R-squared: 0.4833
## F-statistic: 103.9 on 1 and 109 DF, p-value: < 2.2e-16
# Visualisasi scatterplot + garis regresi
plot(aq$Temp, aq$Ozone,
main = "Regresi Linier: Ozone vs Temp",
xlab = "Temperature (F)",
ylab = "Ozone Concentration")
abline(model_ozone_temp, col = "red", lwd = 2)
# Diagnostik model (opsional)
par(mfrow = c(2, 2))
plot(model_ozone_temp)
par(mfrow = c(1, 1))
aq <- na.omit(airquality)
model_ozone_wind <- lm(Ozone ~ Wind, data = aq)
summary(model_ozone_wind)
##
## Call:
## lm(formula = Ozone ~ Wind, data = aq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.513 -18.597 -5.035 15.814 88.437
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 99.0413 7.4724 13.25 < 2e-16 ***
## Wind -5.7288 0.7082 -8.09 9.09e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.42 on 109 degrees of freedom
## Multiple R-squared: 0.3752, Adjusted R-squared: 0.3694
## F-statistic: 65.44 on 1 and 109 DF, p-value: 9.089e-13
plot(aq$Wind, aq$Ozone,
main = "Regresi Linier: Ozone vs Wind",
xlab = "Wind (mph)",
ylab = "Ozone Concentration")
abline(model_ozone_wind, col = "red", lwd = 2)
par(mfrow = c(2, 2))
plot(model_ozone_wind)
par(mfrow = c(1, 1))
# Model Refresi
model_ozone_temp <- lm(Ozone ~ Temp, data = aq)
model_ozone_wind <- lm(Ozone ~ Wind, data = aq)
# Hitung Mean Squared Error (MSE)
mse_temp <- mean(residuals(model_ozone_temp)^2)
mse_wind <- mean(residuals(model_ozone_wind)^2)
# Hitung AIC
aic_temp <- AIC(model_ozone_temp)
aic_wind <- AIC(model_ozone_wind)
# Ambil Adjusted R-squared
adjr2_temp <- summary(model_ozone_temp)$adj.r.squared
adjr2_wind <- summary(model_ozone_wind)$adj.r.squared
# Buat tabel perbandingan
comparison <- data.frame(
Model = c("Ozone ~ Temp", "Ozone ~ Wind"),
MSE = c(mse_temp, mse_wind),
AIC = c(aic_temp, aic_wind),
Adj_R2 = c(adjr2_temp, adjr2_wind)
)
print(comparison)
## Model MSE AIC Adj_R2
## 1 Ozone ~ Temp 561.8688 1023.775 0.4832625
## 2 Ozone ~ Wind 685.6547 1045.876 0.3694195
Dari tugas yang sudah dijalankan, terlihat bahwa Temp adalah variabel prediktor yang lebih baik dibanding Wind untuk memodelkan Ozone.
Namun, hubungan tersebut tetap sederhana karena hanya memakai regresi linier sederhana. Dalam kenyataan, Ozone dipengaruhi oleh banyak faktor (kelembapan, radiasi matahari, kondisi atmosfer).
Untuk analisis lanjutan,bisa dilakukan dengan: -Membuat regresi berganda: Ozone ~ Temp + Wind + Solar.R. -Membandingkan model sederhana vs berganda dengan AIC, R², dsb. -Mengecek asumsi regresi (normalitas residual, homoskedastisitas, dll).