UAS PRAKTEK KOMPUTASI STATISTIKA

Nama    : Ronaldo Situmorang
NIM     : 2304220026
Prodi   : Statistika dan Sains Data

IMPORT DATASET AIRQUALITY

dataset = airquality

MENGHITUNG STATISTIK DESTRIPTIF( mean, median, standart deviasi)

mean_Ozone <- mean(airquality$Ozone, na.rm = TRUE)
median_Ozone <- median(airquality$Ozone, na.rm = TRUE)
sd_Ozone <- sd(airquality$Ozone, na.rm = TRUE)

cat("Mean Ozone:", mean_Ozone, "\n")
## Mean Ozone: 42.12931
cat("Median Ozone:", median_Ozone, "\n")
## Median Ozone: 31.5
cat("Standar Deviasi Ozone:", sd_Ozone, "\n")
## Standar Deviasi Ozone: 32.98788

DIAGRAM PENCAR

plot(airquality$Wind, airquality$Temp, 
     xlab = "Wind", ylab = "Temp", 
     main = "Scatter Plot antara Wind dan Temp")

Load dataset mtcars

data("mtcars")

Buat bar chart untuk variabel cyl

barplot(table(mtcars$cyl), 
        xlab = "Jumlah Silinder (cyl)", 
        ylab = "Jumlah Mobil", 
        main = "Bar Chart untuk Variabel cyl",
        col = "lightblue")

### Load dataset iris

data("iris")

Buat boxplot

boxplot(Petal.Width ~ Species, data = iris,
        main = "Boxplot Petal.Width berdasarkan Species",
        xlab = "Species", ylab = "Petal Width",
        col = "lightgreen")

MENGHITUNG KORELASI

correlation <- cor(iris$Sepal.Length, iris$Petal.Length)
cat("Korelasi antara Sepal.Length dan Petal.Length:", correlation, "\n")
## Korelasi antara Sepal.Length dan Petal.Length: 0.8717538
print(summary)
## function (object, ...) 
## UseMethod("summary")
## <bytecode: 0x000001f97b096b58>
## <environment: namespace:base>

Keterangan: Nilai positif menunjukkan hubungan linear positif antara dua variabel.

Scatter plot Sepal.Length vs Sepal.Width

library(ggplot2)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Scatter Plot Sepal.Length vs Sepal.Width",
       x = "Sepal Length", y = "Sepal Width") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Uji Chi-Square untuk vs dan am

chi_test <- chisq.test(table(mtcars$vs, mtcars$am))

Cetak hasil uji

print(chi_test)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(mtcars$vs, mtcars$am)
## X-squared = 0.34754, df = 1, p-value = 0.5555

Bangun model regresi linear sederhana

model <- lm(Temp ~ Solar.R, data = airquality)

Ringkasan model

summary(model)
## 
## Call:
## lm(formula = Temp ~ Solar.R, data = airquality)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.3787  -4.9572   0.8932   5.9111  18.4013 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 72.863012   1.693951  43.014  < 2e-16 ***
## Solar.R      0.028255   0.008205   3.444 0.000752 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.898 on 144 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.07609,    Adjusted R-squared:  0.06967 
## F-statistic: 11.86 on 1 and 144 DF,  p-value: 0.0007518

Scatter plot dengan garis regresi

plot(airquality$Solar.R, airquality$Temp, 
     main = "Scatter Plot Temp vs Solar.R",
     xlab = "Solar.R", ylab = "Temp", 
     pch = 16, col = "blue")

print(summary)
## function (object, ...) 
## UseMethod("summary")
## <bytecode: 0x000001f97b096b58>
## <environment: namespace:base>

Tambahkan garis regresi

plotregresi <- ggplot(airquality, aes(x = Solar.R, y = Temp)) + 
  geom_point() + 
  geom_smooth(method = "lm")
print(plotregresi)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 7 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).

Scatter plot dengan garis regresi menunjukkan hubungan antara Sepal.Length dan Sepal.Width. Warna berbeda berdasarkan spesies membantu memahami perbedaan pola hubungan antar kelompok spesies. Garis regresi memberikan gambaran tren umum hubungan linear antar dua variabel tersebut.

Berdasarkan hasil uji model regresi, Variabel independen berpengaruh terhadap variabel dependen. Hal ini dapat di lihat dari nilai p-value yang sangat rendah (<0.05) sehingga kita menolak H0 dan menerima H1.

Berikut adalah bentuk Modelnya Model Regresi : 0.028255x + 72.8630 R-Squared: 0.06967 6.9% (Sangat Lemah)