# Load dataset
data("airquality")

# (a) Hitung statistik deskriptif untuk variabel Ozone
summary(airquality$Ozone)             # Mean, median, min, max, dan kuartil
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   18.00   31.50   42.13   63.25  168.00      37
sd(airquality$Ozone, na.rm = TRUE)    # Standar deviasi dengan mengabaikan NA
## [1] 32.98788
# (b) Scatter plot antara Wind dan Temp
plot(airquality$Wind, airquality$Temp,
     main = "Scatter Plot: Wind vs Temp",
     xlab = "Wind",
     ylab = "Temperature",
     pch = 19, col = "blue")  # Warna dan simbol titik

# Load dataset
data("mtcars")

# Buat bar chart untuk variabel cyl
cyl_counts <- table(mtcars$cyl)    # Hitung frekuensi tiap kategori cyl
barplot(cyl_counts,
        main = "Bar Chart of cyl",
        xlab = "Number of Cylinders",
        ylab = "Frequency",
        col = "lightblue")         # Warna lightblue untuk tampilan menarik

# Load dataset
data("iris")

# (a) Boxplot Petal.Width berdasarkan Species
boxplot(Petal.Width ~ Species, data = iris,
        main = "Boxplot of Petal.Width by Species",
        xlab = "Species",
        ylab = "Petal Width",
        col = c("red", "green", "blue"))   # Warna berbeda untuk setiap spesies

# (b) Korelasi antara Sepal.Length dan Petal.Length
correlation <- cor(iris$Sepal.Length, iris$Petal.Length)
print(paste("Korelasi antara Sepal.Length dan Petal.Length: ", correlation))
## [1] "Korelasi antara Sepal.Length dan Petal.Length:  0.871753775886583"
# (c) Scatter plot Sepal.Length dan Sepal.Width berdasarkan Species
library(ggplot2)   # Library ggplot2 untuk visualisasi
## Warning: package 'ggplot2' was built under R version 4.4.2
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
  geom_point(size = 3) +                         # Titik scatter plot
  geom_smooth(method = "lm", se = FALSE) +       # Garis regresi
  ggtitle("Scatter Plot: Sepal.Length vs Sepal.Width") +
  xlab("Sepal Length") + ylab("Sepal Width") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Tabel kontingensi antara vs dan am
table_vs_am <- table(mtcars$vs, mtcars$am)

# Uji Chi-Square
chisq_result <- chisq.test(table_vs_am)
print("Hasil Uji Chi-Square:")
## [1] "Hasil Uji Chi-Square:"
print(chisq_result)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table_vs_am
## X-squared = 0.34754, df = 1, p-value = 0.5555
# Hapus baris dengan nilai NA
airquality_clean <- na.omit(airquality)

# (a) Bangun model regresi linear sederhana
model <- lm(Temp ~ Solar.R, data = airquality_clean)
summary_model <- summary(model)

# Ringkasan model
print("Ringkasan Model Regresi Linear:")
## [1] "Ringkasan Model Regresi Linear:"
print(summary_model)
## 
## Call:
## lm(formula = Temp ~ Solar.R, data = airquality_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.735  -6.292   1.080   6.231  18.648 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 72.110720   1.970502  36.595  < 2e-16 ***
## Solar.R      0.030747   0.009571   3.212  0.00173 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.15 on 109 degrees of freedom
## Multiple R-squared:  0.08649,    Adjusted R-squared:  0.07811 
## F-statistic: 10.32 on 1 and 109 DF,  p-value: 0.001731
# (b) Scatter plot dengan garis regresi
plot(airquality_clean$Solar.R, airquality_clean$Temp,
     main = "Scatter Plot: Solar.R vs Temp dengan Garis Regresi",
     xlab = "Solar Radiation",
     ylab = "Temperature",
     pch = 19, col = "blue")  # Plot titik data

# Tambahkan garis regresi ke scatter plot
abline(model, col = "red", lwd = 2)   # Garis regresi berwarna merah

# Interpretasi hasil regresi
cat("\nInterpretasi Hasil Model:\n")
## 
## Interpretasi Hasil Model:
# Koefisien regresi
cat("Koefisien Intercept (α):", summary_model$coefficients[1, 1], "\n")
## Koefisien Intercept (α): 72.11072
cat("Koefisien Solar.R (β):", summary_model$coefficients[2, 1], "\n")
## Koefisien Solar.R (β): 0.03074685
# Nilai R-squared (R^2)
cat("Nilai R-squared (R^2):", summary_model$r.squared, "\n")
## Nilai R-squared (R^2): 0.08648754
# Evaluasi kualitas model berdasarkan R-squared
if (summary_model$r.squared > 0.7) {
  cat("Model memiliki kemampuan prediksi yang baik karena R^2 > 0.7.\n")
} else {
  cat("Model memiliki kemampuan prediksi yang lemah atau sedang karena R^2 < 0.7.\n")
}
## Model memiliki kemampuan prediksi yang lemah atau sedang karena R^2 < 0.7.