# MUHAMMAD FARID
# library ggplot2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
# 1.
# Load dataset
data("airquality")
# a. Hitung mean, median, dan standar deviasi untuk Ozone
mean_ozone <- mean(airquality$Ozone)
median_ozone <- median(airquality$Ozone)
sd_ozone <- sd(airquality$Ozone, na.rm = TRUE)
# hasil rata2
mean_ozone
## [1] NA
# median
median_ozone
## [1] NA
# srandar deviasi
sd_ozone
## [1] 32.98788
# b. Scatter plot wind dan temp
plot(airquality$Wind, airquality$Temp,
main = "Scatter Plot",
xlab = "Wind",
ylab = "Temp",
pch = 19, col = "blue")

# 2. dataset mtcars
data("mtcars")
# Hitung variabel cyl
cyl_counts <- table(mtcars$cyl)
# Buat bar chart
barplot(cyl_counts,
main = "Bar Chart Jumlah Tiap Kategori cyl",
xlab = "cyl",
ylab = "Jumlah",
col = c("skyblue","yellow","red"))

# 3a. dataset iris
data("iris")
# Boxplot Petal.width berdasarkan Species
boxplot(Petal.Width ~ Species, data = iris,
main = "Boxplot Petal.Width Berdasarkan Species",
xlab = "Species",
ylab = "Petal.Width",
col = c("skyblue","yellow","red"))

# 3b. Hitung korelasi
hasil <- cor(iris$Sepal.Length, iris$Petal.Length)
# hasil
hasil
## [1] 0.8717538
# 3c. Scatter plot Sepal.Length vs Sepal.Width
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Scatter Plot Sepal.Length vs Sepal.Width",
x = "Sepal.Length",
y = "Sepal.Width") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# 4. Uji Chi-Square antara vs dan am (mtcars)
hasil_chi_square <- chisq.test(mtcars$vs, mtcars$am)
# Tampilkan hasil
hasil_chi_square
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mtcars$vs and mtcars$am
## X-squared = 0.34754, df = 1, p-value = 0.5555
# 5a. Model regresi linear
model <- lm(Temp ~ Solar.R, data = airquality)
# Ringkasan model
summary(model)
##
## Call:
## lm(formula = Temp ~ Solar.R, data = airquality)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.3787 -4.9572 0.8932 5.9111 18.4013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.863012 1.693951 43.014 < 2e-16 ***
## Solar.R 0.028255 0.008205 3.444 0.000752 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.898 on 144 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.07609, Adjusted R-squared: 0.06967
## F-statistic: 11.86 on 1 and 144 DF, p-value: 0.0007518
# Scatter plot dengan garis regresi
plot(airquality$Solar.R, airquality$Temp,
main = "Scatter Plot Temp vs Solar.R",
xlab = "Solar.R",
ylab = "Temp",
pch = 19, col = "blue")
abline(model, col = "red", lwd = 2)

# 5c. Interpretasi
## Signifikansi Koefisien Regresi
### P-value untuk Intercept (< 2e-16) dan Solar.R (0.000752)
### P-value untuk Solar.R lebih kecil dari 0.05, sehingga koefisien Solar.R signifikan secara statistik.Sehingga Solar.R berpengaruh signifikan terhadap Temp dalam model ini.
### T-value sebesar 3.444 > 0.05 menunjukkan bahwa efek Solar.R secara signifikan berbeda dari nol.
## R-Square
### Multiple R-squared = 0.07609 = 7.6% menunjukkan bahwa Solar.R hanya menjelaskan sebagian kecil dari variasi dalam Temp.
### Adjusted R-squared = 0.06967 = 6.9% memberikan Nilai rendah, dengan ini memperkuat indikasi bahwa model hanya memiliki kemampuan prediksi yang terbatas.