library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
data <- read.csv2("C:/Users/Hype AMD/OneDrive/Documents/Dataset Analisis Regresi2.csv ")
cat("Data awal:\n")
## Data awal:
head(data)
## No Tanggal Kode.Pelanggan Nama.Pelanggan Kode.Produk Nama.Produk Qty
## 1 1 01-Aug NF-01 Toko Nofri B-05 Besi 5 Meter 340
## 2 2 01-Aug AN-02 Toko Anton B-10 Besi 10 Meter 140
## 3 3 01-Aug CT-03 Toko Central P-5 Pipa 5 Meter 560
## 4 4 02-Aug CT-03 Toko Central P-10 Pipa 10 Meter 230
## 5 5 02-Aug AN-02 Toko Anton B-05 Besi 5 Meter 770
## 6 6 02-Aug NF-01 Toko Nofri B-10 Besi 10 Meter 780
## Harga.Satuan Total.Penjualan Diskon Penjualan.Bersih
## 1 200000 68000000 0.1 61200000
## 2 375000 52500000 0 52500000
## 3 100000 56000000 0.2 44800000
## 4 185000 42550000 0.1 38295000
## 5 200000 154000000 0.2 123200000
## 6 375000 292500000 0.2 234000000
data$Qty <- as.numeric(data$Qty)
data$`Harga Satuan` <- as.numeric(data$`Harga.Satuan`)
data$Diskon <- as.numeric(data$Diskon)
data$`Penjualan Bersih` <- as.numeric(data$`Penjualan.Bersih`)
cat("\nCek Missing Value:\n")
##
## Cek Missing Value:
colSums(is.na(data[, c("Qty","Harga.Satuan","Diskon","Penjualan.Bersih")]))
## Qty Harga.Satuan Diskon Penjualan.Bersih
## 0 0 0 0
data <- na.omit(data)
cat("\nSetelah hapus missing:\n")
##
## Setelah hapus missing:
colSums(is.na(data))
## No Tanggal Kode.Pelanggan Nama.Pelanggan
## 0 0 0 0
## Kode.Produk Nama.Produk Qty Harga.Satuan
## 0 0 0 0
## Total.Penjualan Diskon Penjualan.Bersih Harga Satuan
## 0 0 0 0
## Penjualan Bersih
## 0
par(mfrow=c(2,2))
boxplot(data$Qty, main="Qty")
boxplot(data$`Harga Satuan`, main="Harga.Satuan")
boxplot(data$Diskon, main="Diskon")
boxplot(data$`Penjualan Bersih`, main="Penjualan.Bersih")

par(mfrow=c(1,1))
cor_matrix <- cor(data[, c("Qty","Harga.Satuan","Diskon","Penjualan.Bersih")])
cat("\nKorelasi Pearson:\n")
##
## Korelasi Pearson:
print(cor_matrix)
## Qty Harga.Satuan Diskon Penjualan.Bersih
## Qty 1.000000000 -0.005890328 0.9406988 0.7996069
## Harga.Satuan -0.005890328 1.000000000 -0.1774788 0.4975503
## Diskon 0.940698849 -0.177478780 1.0000000 0.6595427
## Penjualan.Bersih 0.799606895 0.497550273 0.6595427 1.0000000
model <- lm(`Penjualan.Bersih` ~ Qty + `Harga.Satuan` + Diskon, data = data)
summary(model)
##
## Call:
## lm(formula = Penjualan.Bersih ~ Qty + Harga.Satuan + Diskon,
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22626630 -15428859 3344685 7043452 47531663
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.125e+07 1.633e+07 -3.750 0.00321 **
## Qty 1.932e+05 7.388e+04 2.615 0.02403 *
## Harga.Satuan 2.867e+02 6.727e+01 4.262 0.00134 **
## Diskon -5.248e+07 2.418e+08 -0.217 0.83215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21130000 on 11 degrees of freedom
## Multiple R-squared: 0.8921, Adjusted R-squared: 0.8627
## F-statistic: 30.32 on 3 and 11 DF, p-value: 1.292e-05
coef_model <- coef(model)
cat("\nPersamaan Regresi:\n")
##
## Persamaan Regresi:
cat("Y =",
round(coef_model[1],3), "+",
round(coef_model[2],3),"*Qty +",
round(coef_model[3],3),"*Harga +",
round(coef_model[4],3),"*Diskon\n")
## Y = -61245802 + 193210.9 *Qty + 286.68 *Harga + -52477049 *Diskon
shapiro.test(residuals(model))
##
## Shapiro-Wilk normality test
##
## data: residuals(model)
## W = 0.88909, p-value = 0.06496
car::vif(model)
## Qty Harga.Satuan Diskon
## 11.324972 1.345731 11.692890
lmtest::bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 6.8423, df = 3, p-value = 0.0771
lmtest::dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 2.2322, p-value = 0.6905
## alternative hypothesis: true autocorrelation is greater than 0
data$prediksi <- predict(model)
rmse <- sqrt(mean((data$`Penjualan Bersih` - data$prediksi)^2))
mae <- mean(abs(data$`Penjualan Bersih` - data$prediksi))
mape <- mean(abs((data$`Penjualan Bersih` - data$prediksi) / data$`Penjualan Bersih`)) * 100
cat("\nEvaluasi Model:\n")
##
## Evaluasi Model:
cat("RMSE =", rmse, "\n")
## RMSE = 18097768
cat("MAE =", mae, "\n")
## MAE = 14104698
cat("MAPE =", mape, "%\n")
## MAPE = 31.59128 %
ggplot(data, aes(x = 1:nrow(data))) +
geom_line(aes(y = `Penjualan.Bersih`, colour = "Aktual")) +
geom_line(aes(y = prediksi, colour = "Prediksi")) +
labs(title = "Aktual vs Prediksi",
x = "Observasi",
y = "Penjualan") +
scale_colour_manual(values = c("Aktual" = "blue", "Prediksi" = "red"))

ggplot(data, aes(x=prediksi, y=residuals(model))) +
geom_point() +
geom_hline(yintercept=0) +
labs(title="Scatter Residual", x="Prediksi", y="Residual")
