######### Analisis Regresi Sederhana ##########
# =========================
# 1. INPUT DATA
# =========================
iklan <- c(10,9,11,12,11,12,13,13,14,15)
penjualan <- c(44,40,42,46,48,52,54,58,56,60)

data <- data.frame(iklan, penjualan)

# Scatterplot 
# ========================= #
#  INSTALL & LOAD PACKAGE   #
# ========================= #
# install.packages("ggplot2")  # jalankan sekali saja

library(ggplot2)

ggplot(data, aes(x = iklan, y = penjualan)) +
  geom_point(size = 4, color = "#2E86C1") +
  geom_smooth(method = "lm", 
              se = TRUE, 
              color = "#E74C3C", 
              fill = "#FADBD8", 
              linewidth = 1.2) +
  labs(
    title = "Hubungan Iklan terhadap Penjualan",
    subtitle = "Regresi Linear Sederhana",
    x = "Biaya Iklan", 
    y = "Penjualan"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 12),
    plot.subtitle = element_text(size = 10),
    axis.title = element_text(face = "bold")
  )
## `geom_smooth()` using formula = 'y ~ x'

# Interpretasi: bentuk polanya naik, turun, atau tidak beraturan 
# Dugaan ada hubungan linear antara x dan y maka bentuk polanya naik/turun, jika tidak beraturan maka diduga tidak ada hubungan linear. 

# =========================
# 2. REGRESI LINEAR SEDERHANA
# =========================

model <- lm(penjualan ~ iklan, data = data)
summary(model)
## 
## Call:
## lm(formula = penjualan ~ iklan, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4667 -0.9500  0.5333  1.4167  4.4667 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7.6000     6.3323   1.200 0.264397    
## iklan         3.5333     0.5223   6.765 0.000143 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.861 on 8 degrees of freedom
## Multiple R-squared:  0.8512, Adjusted R-squared:  0.8326 
## F-statistic: 45.77 on 1 and 8 DF,  p-value: 0.0001428
R2 <- summary(model)$r.squared
R2
## [1] 0.8512121
# model regresi: y=7.6+3.53x
# interpretasi b0: saat biaya iklan tidak ada atau nol, maka penjualan sebesar 7.6
# interpretasi b1: jika biaya iklan naik satu satuan, maka penjualan akan bertambah sebesar 3.53

# =========================
# 3. UJI ASUMSI
# =========================

#  Shapiro-Wilk (n<30)
shapiro.test(residuals(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.9452, p-value = 0.6121
# Kolmogorov-Smirnov (n>=30)
ks.test(residuals(model), "pnorm", mean(residuals(model)), sd(residuals(model)))
## 
##  Exact one-sample Kolmogorov-Smirnov test
## 
## data:  residuals(model)
## D = 0.16869, p-value = 0.8951
## alternative hypothesis: two-sided
# 3.1 Uji Normalitas Residual (Shapiro-Wilk)
shapiro.test(residuals(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.9452, p-value = 0.6121
# uji hipotesis: 
# H0: residual berdistribusi normal, H1: residual tidak berdistribusi normal

# 3.2 Uji Homoskedastisitas (Breusch-Pagan)

# Tujuan: Mengetahui apakah Varians residual sama pada semua tingkat X?

# install.packages("lmtest")   # jalankan sekali saja
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(model)
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 0.0023414, df = 1, p-value = 0.9614
# uji hipotesis: 
# H0: Residual homogen, H1: Residual tidak homogen

# 3.3 Uji Autokorelasi (Durbin-Watson) 

# biasanya dilakukan hanya jika data berdasarkan waktu
# Tujuan: Melihat apakah  residual pada observasi ke-i berkorelasi dengan residual observasi lain.

# install.packages("car")  # jalankan sekali saja

library(car)
## Loading required package: carData
durbinWatsonTest(model)
##  lag Autocorrelation D-W Statistic p-value
##    1       0.1780041      1.621113   0.282
##  Alternative hypothesis: rho != 0
# uji hipotesis: 
# H0: Tidak ada autokorelasi, H1: Ada autokorelasi

# =========================
# 4. VISUALISASI
# =========================
par(mfrow=c(2,2))
plot(model)

# Q-Q Residuals: Jika Residual membentuk pola linear-> normalitas terpenuhi
# Residuals vs Fitted: Jika tidak membentuk pola tertentu, diasumsikan residual homogen->tidak terjadi heterokedastistitas
# Residuals vs leverage: Tidak membentuk pola tertentu, diasumsikan tidak ada autokorelasi



##################################
#### Regresi Linear Berganda #####
##################################

data_bisnis <- data.frame(
  Sales = c(520, 610, 580, 450, 700, 640, 500, 720, 680, 560, 590, 630),
  Promotion = c(50, 65, 60, 40, 80, 70, 45, 85, 75, 55, 58, 68),
  Staff = c(20, 25, 23, 18, 30, 27, 19, 32, 29, 22, 24, 26),
  StoreSize = c(200, 250, 230, 180, 300, 270, 190, 320, 290, 210, 240, 260),
  CustomerSat = c(78, 85, 82, 70, 90, 88, 75, 92, 89, 80, 83, 87),
  Loyalty = c(72, 80, 78, 65, 88, 84, 70, 90, 86, 75, 79, 83)
)

data_bisnis
##    Sales Promotion Staff StoreSize CustomerSat Loyalty
## 1    520        50    20       200          78      72
## 2    610        65    25       250          85      80
## 3    580        60    23       230          82      78
## 4    450        40    18       180          70      65
## 5    700        80    30       300          90      88
## 6    640        70    27       270          88      84
## 7    500        45    19       190          75      70
## 8    720        85    32       320          92      90
## 9    680        75    29       290          89      86
## 10   560        55    22       210          80      75
## 11   590        58    24       240          83      79
## 12   630        68    26       260          87      83
df<-data_bisnis[, c("Sales", "Promotion", "Staff", "StoreSize")]

# Pairwise plot
pairs(df, pch=19, col="steelblue", main="Pairwise Plot: Sales vs Predictors")

#scatterplot x vs y
library(ggplot2)

ggplot(df, aes(x=Promotion, y=Sales)) +
  geom_point(size=3, alpha=0.85) +
  geom_smooth(method="lm", se=TRUE) +
  theme_minimal(base_size = 13) +
  labs(title="Sales vs Promotion", x="Promotion", y="Sales")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(df, aes(x=Staff, y=Sales)) +
  geom_point(size=3, alpha=0.85) +
  geom_smooth(method="lm", se=TRUE) +
  theme_minimal(base_size = 13) +
  labs(title="Sales vs Staff", x="Staff", y="Sales")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(df, aes(x=StoreSize, y=Sales)) +
  geom_point(size=3, alpha=0.85) +
  geom_smooth(method="lm", se=TRUE) +
  theme_minimal(base_size = 13) +
  labs(title="Sales vs StoreSize", x="StoreSize", y="Sales")
## `geom_smooth()` using formula = 'y ~ x'

# Model Regresi Berganda
model <- lm(Sales ~ Promotion + Staff+StoreSize , data = data_bisnis)
summary(model)
## 
## Call:
## lm(formula = Sales ~ Promotion + Staff + StoreSize, data = data_bisnis)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.1250  -0.6046   0.0464   3.0067  14.7174 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 201.2062    31.5305   6.381 0.000213 ***
## Promotion     4.1824     1.9244   2.173 0.061495 .  
## Staff        13.7017    12.4941   1.097 0.304703    
## StoreSize    -0.8223     1.0305  -0.798 0.447930    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.659 on 8 degrees of freedom
## Multiple R-squared:   0.99,  Adjusted R-squared:  0.9862 
## F-statistic:   263 on 3 and 8 DF,  p-value: 2.487e-08
# Uji asumsi

# 1. Uji normalitas residual
#  Shapiro-Wilk (n<30)
shapiro.test(residuals(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.90637, p-value = 0.1916
# Kolmogorov-Smirnov (n>=30)
ks.test(residuals(model), "pnorm", mean(residuals(model)), sd(residuals(model)))
## 
##  Exact one-sample Kolmogorov-Smirnov test
## 
## data:  residuals(model)
## D = 0.24177, p-value = 0.4181
## alternative hypothesis: two-sided
# 2 Uji Homoskedastisitas (Breusch-Pagan)
# Tujuan: Mengetahui apakah Varians residual sama pada semua tingkat X?
# install.packages("lmtest")   # jalankan sekali saja
library(lmtest)
bptest(model)
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 8.8567, df = 3, p-value = 0.03126
# uji hipotesis: 
# H0: Residual homogen, H1: Residual tidak homogen

# 3 Uji Multikolinearitas
vif(model)
## Promotion     Staff StoreSize 
##  83.87217 362.94793 253.81302
# VIF>10 -> ada multikolinearitas

# 4. Uji Autokorelasi (Durbin-Watson) 

# biasanya dilakukan hanya jika data berdasarkan waktu
# Tujuan: Melihat apakah  residual pada observasi ke-i berkorelasi dengan residual observasi lain.
# install.packages("car")  # jalankan sekali saja

library(car)
durbinWatsonTest(model)
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.2211687      2.437308   0.354
##  Alternative hypothesis: rho != 0
# uji hipotesis: 
# H0: Tidak ada autokorelasi, H1: Ada autokorelasi