#jawaban nomer 1
# Menghitung statistik deskriptif untuk variabel Ozone
data(airquality)
mean_ozone <- mean(airquality$Ozone, na.rm = TRUE)
median_ozone <- median(airquality$Ozone, na.rm = TRUE)
sd_ozone <- sd(airquality$Ozone, na.rm = TRUE)
# Menampilkan hasil
print(paste("Mean Ozone:", round(mean_ozone, 2)))
## [1] "Mean Ozone: 42.13"
print(paste("Median Ozone:", round(median_ozone, 2)))
## [1] "Median Ozone: 31.5"
print(paste("Standard Deviation Ozone:", round(sd_ozone, 2)))
## [1] "Standard Deviation Ozone: 32.99"
# Membuat scatter plot antara Wind dan Temp
plot(airquality$Wind, airquality$Temp,
xlab = "Wind (mph)", ylab = "Temperature (°F)",
main = "Scatter Plot of Wind vs Temperature",
pch = 19, col = "blue")

#jawaban nomer 2
# Load dataset mtcars
data(mtcars)
# Membuat bar chart untuk variabel cyl
cyl_counts <- table(mtcars$cyl) # Menghitung jumlah kategori
barplot(cyl_counts,
main = "Bar Chart of Cylinder Counts",
xlab = "Number of Cylinders",
ylab = "Count",
col = "lightblue")
# Menambahkan label pada bar
text(x = 1:length(cyl_counts),
y = cyl_counts,
labels = cyl_counts,
pos = 3, cex = 0.8, col = "red")

#jawaban nomer 3
# Load dataset iris
data(iris)
# Membuat boxplot untuk Petal.Width berdasarkan Species
boxplot(Petal.Width ~ Species, data = iris,
main = "Boxplot of Petal.Width by Species",
xlab = "Species", ylab = "Petal Width (cm)",
col = c("lightgreen", "lightblue", "lightpink"))

# Menghitung korelasi antara Sepal.Length dan Petal.Length
correlation <- cor(iris$Sepal.Length, iris$Petal.Length)
print(paste("Korelasi antara Sepal.Length dan Petal.Length:", round(correlation, 3)))
## [1] "Korelasi antara Sepal.Length dan Petal.Length: 0.872"
# Membuat scatter plot dengan warna berbeda untuk setiap spesies
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point(size = 3) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Scatter Plot of Sepal.Length vs Sepal.Width",
x = "Sepal Length (cm)",
y = "Sepal Width (cm)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

#jawaban nomer 4
# Menghitung tabel kontingensi antara vs dan am
table_vs_am <- table(mtcars$vs, mtcars$am)
# Melakukan uji Chi-Square
chi_test <- chisq.test(table_vs_am)
# Menampilkan hasil
print(chi_test)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table_vs_am
## X-squared = 0.34754, df = 1, p-value = 0.5555
#H_0 < 0,05, maka tolak h_0 (ada hubungan signifikan antara vs dan am)
#H_1 > 0,05, maka terima h_1 (tidak ada hubungan signifikan antara vs dan am)
#jawaban nomer 5
# Membuat model regresi linear sederhana
model <- lm(Temp ~ Solar.R, data = airquality)
# Menampilkan ringkasan model
summary(model)
##
## Call:
## lm(formula = Temp ~ Solar.R, data = airquality)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.3787 -4.9572 0.8932 5.9111 18.4013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.863012 1.693951 43.014 < 2e-16 ***
## Solar.R 0.028255 0.008205 3.444 0.000752 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.898 on 144 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.07609, Adjusted R-squared: 0.06967
## F-statistic: 11.86 on 1 and 144 DF, p-value: 0.0007518
# Scatter plot dengan garis regresi
plot(airquality$Solar.R, airquality$Temp,
xlab = "Solar Radiation (Solar.R)",
ylab = "Temperature (°F)",
main = "Scatter Plot with Regression Line",
pch = 19, col = "blue")
# Menambahkan garis regresi
abline(model, col = "red", lwd = 2)

#Intercept (72.86): Jika nilai Solar.R (intensitas radiasi matahari) adalah 0, maka suhu (Temp) diperkirakan 72.86.
#Solar.R Coefficient (0.028): Setiap peningkatan 1 unit pada Solar.R akan meningkatkan suhu (Temp) sebesar 0.028 unit.
#Nilai p-value untuk koefisien Solar.R adalah 0.000752, yang lebih kecil dari 0.05, sehingga koefisien ini signifikan. Artinya, Solar.R berpengaruh signifikan terhadap suhu.
#Meskipun Solar.R berpengaruh signifikan terhadap suhu, kemampuan prediksi model ini masih rendah (R-squared hanya 7.6%). Untuk meningkatkan akurasi, mungkin perlu menambahkan variabel lain ke dalam model.