summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00
# Import dataset
data(mtcars)

# a. Hitung statistik deskriptif untuk mpg
mean_mpg <- mean(mtcars$mpg)
median_mpg <- median(mtcars$mpg)
sd_mpg <- sd(mtcars$mpg)

cat("Mean:", mean_mpg, "Median:", median_mpg, "Standard Deviation:", sd_mpg, "\n")
## Mean: 20.09062 Median: 19.2 Standard Deviation: 6.026948
# b. Buat boxplot variabel mpg berdasarkan variabel cyl
boxplot(mpg ~ cyl, data = mtcars,
        main = "Boxplot of MPG by Number of Cylinders",
        xlab = "Number of Cylinders",
        ylab = "Miles per Gallon (MPG)",
        col = c("lightblue", "lightgreen", "pink"))

library(e1071)
## Warning: package 'e1071' was built under R version 4.4.2
# Histogram dan garis densitas untuk hp
hist(mtcars$hp, breaks = 10, probability = TRUE, 
     main = "Histogram of Horsepower (hp)",
     xlab = "Horsepower",
     col = "lightgray", border = "white")
lines(density(mtcars$hp), col = "blue", lwd = 2)

# Hitung skewness
skewness_hp <- skewness(mtcars$hp)
cat("Skewness:", skewness_hp, "\n")
## Skewness: 0.7260237
# Interpretasi distribusi
if (skewness_hp > 0) {
    cat("Distribusi horsepower adalah positively skewed (kanan).\n")
} else if (skewness_hp < 0) {
    cat("Distribusi horsepower adalah negatively skewed (kiri).\n")
} else {
    cat("Distribusi horsepower adalah simetris.\n")
}
## Distribusi horsepower adalah positively skewed (kanan).
# Import dataset iris
data(iris)

# ANOVA untuk Sepal.Length antar spesies
anova_result <- aov(Sepal.Length ~ Species, data = iris)
summary(anova_result)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Species       2  63.21  31.606   119.3 <2e-16 ***
## Residuals   147  38.96   0.265                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Jika signifikan, lakukan post-hoc test (Tukey HSD)
if (summary(anova_result)[[1]]["Species", "Pr(>F)"] < 0.05) {
  tukey_result <- TukeyHSD(anova_result)
  print(tukey_result)
}
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Sepal.Length ~ Species, data = iris)
## 
## $Species
##                       diff       lwr       upr p adj
## versicolor-setosa    0.930 0.6862273 1.1737727     0
## virginica-setosa     1.582 1.3382273 1.8257727     0
## virginica-versicolor 0.652 0.4082273 0.8957727     0
# Filter data untuk spesies Setosa dan Versicolor
setosa <- subset(iris, Species == "setosa")$Petal.Length
versicolor <- subset(iris, Species == "versicolor")$Petal.Length

# Uji t dua sampel
t_test_result <- t.test(setosa, versicolor, var.equal = TRUE)
print(t_test_result)
## 
##  Two Sample t-test
## 
## data:  setosa and versicolor
## t = -39.493, df = 98, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.938597 -2.657403
## sample estimates:
## mean of x mean of y 
##     1.462     4.260
# Kesimpulan
cat("P-value:", t_test_result$p.value, "\n")
## P-value: 5.404911e-62
if (t_test_result$p.value < 0.05) {
  cat("Ada perbedaan signifikan dalam panjang petal antara setosa dan versicolor.\n")
} else {
  cat("Tidak ada perbedaan signifikan dalam panjang petal antara setosa dan versicolor.\n")
}
## Ada perbedaan signifikan dalam panjang petal antara setosa dan versicolor.
# a. Bangun model regresi dan tampilkan ringkasan
model <- lm(mpg ~ wt, data = mtcars)
summary(model)
## 
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5432 -2.3647 -0.1252  1.4096  6.8727 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.2851     1.8776  19.858  < 2e-16 ***
## wt           -5.3445     0.5591  -9.559 1.29e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7446 
## F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10
# b. Scatter plot dengan garis regresi
plot(mtcars$wt, mtcars$mpg, 
     main = "Scatter Plot of MPG vs Weight",
     xlab = "Weight (1000 lbs)",
     ylab = "Miles per Gallon (MPG)",
     pch = 19, col = "darkgray")
abline(model, col = "blue", lwd = 2)

# c. Interpretasi hasil
cat("Koefisien regresi (Intercept):", coef(model)[1], "\n")
## Koefisien regresi (Intercept): 37.28513
cat("Koefisien regresi (wt):", coef(model)[2], "\n")
## Koefisien regresi (wt): -5.344472
cat("Nilai R²:", summary(model)$r.squared, "\n")
## Nilai R²: 0.7528328