1. Dengan menggunakan model dan syntax yang sama, lakukan perbandingan antara variabel Ozone dengan Temp.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(splines2)
df_airquality <- datasets::airquality
head(df_airquality)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
# Load data
data("airquality")
# Cek data dan hilangkan NA
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 
aq <- na.omit(airquality)
# Model regresi sederhana
model_ozone_temp <- lm(Ozone ~ Temp, data = aq)

# Ringkasan hasil regresi
summary(model_ozone_temp)
## 
## Call:
## lm(formula = Ozone ~ Temp, data = aq)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.922 -17.459  -0.874  10.444 118.078 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -147.6461    18.7553  -7.872 2.76e-12 ***
## Temp           2.4391     0.2393  10.192  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.92 on 109 degrees of freedom
## Multiple R-squared:  0.488,  Adjusted R-squared:  0.4833 
## F-statistic: 103.9 on 1 and 109 DF,  p-value: < 2.2e-16
# Visualisasi scatterplot + garis regresi
plot(aq$Temp, aq$Ozone,
     main = "Regresi Linier: Ozone vs Temp",
     xlab = "Temperature (F)",
     ylab = "Ozone Concentration")
abline(model_ozone_temp, col = "red", lwd = 2)

#  Diagnostik model (opsional)
par(mfrow = c(2, 2))
plot(model_ozone_temp)

par(mfrow = c(1, 1))

2.Ulangi analisis untuk hubungan antara Ozone dengan Wind

aq <- na.omit(airquality)
model_ozone_wind <- lm(Ozone ~ Wind, data = aq)
summary(model_ozone_wind)
## 
## Call:
## lm(formula = Ozone ~ Wind, data = aq)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.513 -18.597  -5.035  15.814  88.437 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  99.0413     7.4724   13.25  < 2e-16 ***
## Wind         -5.7288     0.7082   -8.09 9.09e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.42 on 109 degrees of freedom
## Multiple R-squared:  0.3752, Adjusted R-squared:  0.3694 
## F-statistic: 65.44 on 1 and 109 DF,  p-value: 9.089e-13
plot(aq$Wind, aq$Ozone,
     main = "Regresi Linier: Ozone vs Wind",
     xlab = "Wind (mph)",
     ylab = "Ozone Concentration")
abline(model_ozone_wind, col = "red", lwd = 2)

par(mfrow = c(2, 2))
plot(model_ozone_wind)

par(mfrow = c(1, 1))

3. Bandingkan hasil dari masing-masing model berdasarkan nilai MSE, AIC, dan Adjusted R-squared.

# Model Refresi
model_ozone_temp <- lm(Ozone ~ Temp, data = aq)
model_ozone_wind <- lm(Ozone ~ Wind, data = aq)
# Hitung Mean Squared Error (MSE)
mse_temp <- mean(residuals(model_ozone_temp)^2)
mse_wind <- mean(residuals(model_ozone_wind)^2)
# Hitung AIC
aic_temp <- AIC(model_ozone_temp)
aic_wind <- AIC(model_ozone_wind)
# Ambil Adjusted R-squared
adjr2_temp <- summary(model_ozone_temp)$adj.r.squared
adjr2_wind <- summary(model_ozone_wind)$adj.r.squared
# Buat tabel perbandingan
comparison <- data.frame(
  Model = c("Ozone ~ Temp", "Ozone ~ Wind"),
  MSE   = c(mse_temp, mse_wind),
  AIC   = c(aic_temp, aic_wind),
  Adj_R2 = c(adjr2_temp, adjr2_wind)
)

print(comparison)
##          Model      MSE      AIC    Adj_R2
## 1 Ozone ~ Temp 561.8688 1023.775 0.4832625
## 2 Ozone ~ Wind 685.6547 1045.876 0.3694195

4.Berikan insight dan pendapatmu.

Dari tugas yang sudah dijalankan, terlihat bahwa Temp adalah variabel prediktor yang lebih baik dibanding Wind untuk memodelkan Ozone.

Namun, hubungan tersebut tetap sederhana karena hanya memakai regresi linier sederhana. Dalam kenyataan, Ozone dipengaruhi oleh banyak faktor (kelembapan, radiasi matahari, kondisi atmosfer).

Untuk analisis lanjutan,bisa dilakukan dengan: -Membuat regresi berganda: Ozone ~ Temp + Wind + Solar.R. -Membandingkan model sederhana vs berganda dengan AIC, R², dsb. -Mengecek asumsi regresi (normalitas residual, homoskedastisitas, dll).