data_pemain <- read.csv("C:/kuliah/4. anreg/premier-player-23-24.csv")

# Mengambil variabel
goals   <- data_pemain$Gls
minutes <- data_pemain$Min

# Membuat data frame
data_pemain2 <- data.frame(goals, minutes)

# Menampilkan data
print(head(data_pemain2))
##   goals minutes
## 1     8    2931
## 2    19    2857
## 3     0    2785
## 4    11    2647
## 5     0    2767
## 6     6    2578
print(tail(data_pemain2))
##     goals minutes
## 575     0      92
## 576     0      28
## 577     0      21
## 578     0      13
## 579     0      10
## 580     0       8
# Statistik deskriptif sederhana
summary(data_pemain2)
##      goals           minutes      
##  Min.   : 0.000   Min.   :   1.0  
##  1st Qu.: 0.000   1st Qu.: 342.8  
##  Median : 1.000   Median :1164.0  
##  Mean   : 2.064   Mean   :1294.6  
##  3rd Qu.: 2.000   3rd Qu.:2104.2  
##  Max.   :27.000   Max.   :3420.0
# Standar deviasi
sd(goals)
## [1] 3.621238
sd(minutes)
## [1] 1024.72
# Uji korelasi Pearson
hasil_pearson <- cor.test(goals, minutes, method = "pearson")
print(hasil_pearson)
## 
##  Pearson's product-moment correlation
## 
## data:  goals and minutes
## t = 12.576, df = 578, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3970915 0.5251177
## sample estimates:
##       cor 
## 0.4635203
# Membuat scatter plot
plot(goals, minutes,
     main = "Scatter Plot Goals vs Minutes Played",
     xlab = "Goals",
     ylab = "Minutes Played",
     pch = 19,
     col = "blue")
# Menambahkan garis regresi
abline(lm(minutes ~ goals), col = "red", lwd = 2)

```