library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
data <- read.csv2("C:/Users/Hype AMD/OneDrive/Documents/Dataset Analisis Regresi2.csv ")

cat("Data awal:\n")
## Data awal:
head(data)
##   No Tanggal Kode.Pelanggan Nama.Pelanggan Kode.Produk   Nama.Produk Qty
## 1  1  01-Aug          NF-01     Toko Nofri        B-05  Besi 5 Meter 340
## 2  2  01-Aug          AN-02     Toko Anton        B-10 Besi 10 Meter 140
## 3  3  01-Aug          CT-03   Toko Central         P-5  Pipa 5 Meter 560
## 4  4  02-Aug          CT-03   Toko Central        P-10 Pipa 10 Meter 230
## 5  5  02-Aug          AN-02     Toko Anton        B-05  Besi 5 Meter 770
## 6  6  02-Aug          NF-01     Toko Nofri        B-10 Besi 10 Meter 780
##   Harga.Satuan Total.Penjualan Diskon Penjualan.Bersih
## 1       200000        68000000    0.1         61200000
## 2       375000        52500000      0         52500000
## 3       100000        56000000    0.2         44800000
## 4       185000        42550000    0.1         38295000
## 5       200000       154000000    0.2        123200000
## 6       375000       292500000    0.2        234000000
data$Qty <- as.numeric(data$Qty)
data$`Harga Satuan` <- as.numeric(data$`Harga.Satuan`)
data$Diskon <- as.numeric(data$Diskon)
data$`Penjualan Bersih` <- as.numeric(data$`Penjualan.Bersih`)
cat("\nCek Missing Value:\n")
## 
## Cek Missing Value:
colSums(is.na(data[, c("Qty","Harga.Satuan","Diskon","Penjualan.Bersih")]))
##              Qty     Harga.Satuan           Diskon Penjualan.Bersih 
##                0                0                0                0
data <- na.omit(data)

cat("\nSetelah hapus missing:\n")
## 
## Setelah hapus missing:
colSums(is.na(data))
##               No          Tanggal   Kode.Pelanggan   Nama.Pelanggan 
##                0                0                0                0 
##      Kode.Produk      Nama.Produk              Qty     Harga.Satuan 
##                0                0                0                0 
##  Total.Penjualan           Diskon Penjualan.Bersih     Harga Satuan 
##                0                0                0                0 
## Penjualan Bersih 
##                0
par(mfrow=c(2,2))

boxplot(data$Qty, main="Qty")
boxplot(data$`Harga Satuan`, main="Harga.Satuan")
boxplot(data$Diskon, main="Diskon")
boxplot(data$`Penjualan Bersih`, main="Penjualan.Bersih")

par(mfrow=c(1,1))
cor_matrix <- cor(data[, c("Qty","Harga.Satuan","Diskon","Penjualan.Bersih")])

cat("\nKorelasi Pearson:\n")
## 
## Korelasi Pearson:
print(cor_matrix)
##                           Qty Harga.Satuan     Diskon Penjualan.Bersih
## Qty               1.000000000 -0.005890328  0.9406988        0.7996069
## Harga.Satuan     -0.005890328  1.000000000 -0.1774788        0.4975503
## Diskon            0.940698849 -0.177478780  1.0000000        0.6595427
## Penjualan.Bersih  0.799606895  0.497550273  0.6595427        1.0000000
model <- lm(`Penjualan.Bersih` ~ Qty + `Harga.Satuan` + Diskon, data = data)

summary(model)
## 
## Call:
## lm(formula = Penjualan.Bersih ~ Qty + Harga.Satuan + Diskon, 
##     data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -22626630 -15428859   3344685   7043452  47531663 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -6.125e+07  1.633e+07  -3.750  0.00321 **
## Qty           1.932e+05  7.388e+04   2.615  0.02403 * 
## Harga.Satuan  2.867e+02  6.727e+01   4.262  0.00134 **
## Diskon       -5.248e+07  2.418e+08  -0.217  0.83215   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21130000 on 11 degrees of freedom
## Multiple R-squared:  0.8921, Adjusted R-squared:  0.8627 
## F-statistic: 30.32 on 3 and 11 DF,  p-value: 1.292e-05
coef_model <- coef(model)

cat("\nPersamaan Regresi:\n")
## 
## Persamaan Regresi:
cat("Y =",
    round(coef_model[1],3), "+",
    round(coef_model[2],3),"*Qty +",
    round(coef_model[3],3),"*Harga +",
    round(coef_model[4],3),"*Diskon\n")
## Y = -61245802 + 193210.9 *Qty + 286.68 *Harga + -52477049 *Diskon
shapiro.test(residuals(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.88909, p-value = 0.06496
car::vif(model)
##          Qty Harga.Satuan       Diskon 
##    11.324972     1.345731    11.692890
lmtest::bptest(model)
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 6.8423, df = 3, p-value = 0.0771
lmtest::dwtest(model)
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 2.2322, p-value = 0.6905
## alternative hypothesis: true autocorrelation is greater than 0
data$prediksi <- predict(model)
rmse <- sqrt(mean((data$`Penjualan Bersih` - data$prediksi)^2))
mae <- mean(abs(data$`Penjualan Bersih` - data$prediksi))
mape <- mean(abs((data$`Penjualan Bersih` - data$prediksi) / data$`Penjualan Bersih`)) * 100

cat("\nEvaluasi Model:\n")
## 
## Evaluasi Model:
cat("RMSE =", rmse, "\n")
## RMSE = 18097768
cat("MAE =", mae, "\n")
## MAE = 14104698
cat("MAPE =", mape, "%\n")
## MAPE = 31.59128 %
ggplot(data, aes(x = 1:nrow(data))) +
  geom_line(aes(y = `Penjualan.Bersih`, colour = "Aktual")) +
  geom_line(aes(y = prediksi, colour = "Prediksi")) +
  labs(title = "Aktual vs Prediksi",
       x = "Observasi",
       y = "Penjualan") +
  scale_colour_manual(values = c("Aktual" = "blue", "Prediksi" = "red"))

ggplot(data, aes(x=prediksi, y=residuals(model))) +
  geom_point() +
  geom_hline(yintercept=0) +
  labs(title="Scatter Residual", x="Prediksi", y="Residual")