# Load necessary library
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.2.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.2.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## Warning: package 'car' was built under R version 4.2.3
## Loading required package: carData
# Read the Excel file
DATA <- read_excel("D:/metode statistika/Data Energy Production (Y) dan Consumption (X).xlsx")
#Peubah X
Energy_Consumption <- c()
for (i in DATA$Energy_Consumption) {
  Energy_Consumption <- c(Energy_Consumption, i)
}
Energy_Consumption_clear <- gsub(",", ".", Energy_Consumption)
data_Energy_Consumption <- as.numeric(Energy_Consumption_clear)


# Peubah Y
Energy_Production <- c()
for (i in DATA$Energy_Production) {
  Energy_Production <- c(Energy_Production, i)
}
Energy_Production_clear <- gsub(",", ".", Energy_Production)
data_Energy_Production <- as.numeric(Energy_Production_clear)
#Hubungan X dengan Y
plot(data_Energy_Consumption, data_Energy_Production, 
     type = "p",  # p = point
     pch = 1,    # bentuk simbol titik
     col = "black",
     xlab = "Energy Consumption", 
     ylab = "Energy Production", 
     main = "Plot Sebaran Energy Consumption dengan Energy Production"
)

data <- data.frame(
  X = data_Energy_Consumption,
  Y = data_Energy_Production
)
# Analisis varians
anova_result <- aov(Y ~ X, data = data)
summary(anova_result)
##               Df  Sum Sq Mean Sq F value Pr(>F)    
## X              1 7950296 7950296  684586 <2e-16 ***
## Residuals   6920   80364      12                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
regresi1 <- lm(Y ~ X, data = data)
summary(regresi1)
## 
## Call:
## lm(formula = Y ~ X, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.2979  -0.2541  -0.0885  -0.0316  31.4614 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.065759   0.041375   1.589    0.112    
## X           0.985090   0.001191 827.397   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.408 on 6920 degrees of freedom
## Multiple R-squared:   0.99,  Adjusted R-squared:   0.99 
## F-statistic: 6.846e+05 on 1 and 6920 DF,  p-value: < 2.2e-16
#Uji Asumsi

#Uji Heteroskedastisitas (p~value lebih dari alpha maka data normal/memenuhi uji)
#Heteroskedastisitas x (1 peubah)
bptest(regresi1)
## 
##  studentized Breusch-Pagan test
## 
## data:  regresi1
## BP = 361.9, df = 1, p-value < 2.2e-16
#Uji autokorelasi (p~value lebih dari alpha maka data normal/memenuhi uji)
#autokorelasi x (1 peubah)
dwtest(regresi1)
## 
##  Durbin-Watson test
## 
## data:  regresi1
## DW = 1.9812, p-value = 0.2175
## alternative hypothesis: true autocorrelation is greater than 0
#Uji Normalitas (p~value lebih dari alpha maka data normal/memenuhi uji)
#Normalitas x (1 peubah)
if(length(residuals(regresi1)) >= 3 & length(residuals(regresi1)) <= 5000){
  shapiro.test(residuals(regresi1))
} else {
  cat("Ukuran sampel di luar rentang yang diizinkan untuk uji normalitas.")
}
## Ukuran sampel di luar rentang yang diizinkan untuk uji normalitas.
plot(regresi1)