price <- c(100, 110, 120, 130, 140, 150, 160, 170, 180, 190)
marketing_cost <- c(20, 25, 22, 30, 35, 40, 42, 45, 50, 55)
stores <- c(50, 52, 55, 58, 60, 62, 65, 67, 70, 72)
income <- c(5000, 5200, 5400, 5600, 5800, 6000, 6200, 6400, 6600, 6800)
sales_volume <- c(200, 220, 210, 230, 240, 250, 260, 270, 280, 290)
# Membuat Data Frame
data <- data.frame(price, marketing_cost, stores, income, sales_volume)
data
## price marketing_cost stores income sales_volume
## 1 100 20 50 5000 200
## 2 110 25 52 5200 220
## 3 120 22 55 5400 210
## 4 130 30 58 5600 230
## 5 140 35 60 5800 240
## 6 150 40 62 6000 250
## 7 160 42 65 6200 260
## 8 170 45 67 6400 270
## 9 180 50 70 6600 280
## 10 190 55 72 6800 290
# Model Regresi Linear Berganda
model <- lm(sales_volume ~ price + marketing_cost + stores
+ income, data = data)
# Ringkasan Model
summary(model)
##
## Call:
## lm(formula = sales_volume ~ price + marketing_cost + stores +
## income, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5778 -1.5081 -0.0229 1.4289 4.4748
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 172.6154 83.1662 2.076 0.0832 .
## price 0.5164 0.9521 0.542 0.6071
## marketing_cost 1.9689 0.5688 3.462 0.0134 *
## stores -1.2139 3.4233 -0.355 0.7350
## income NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.059 on 6 degrees of freedom
## Multiple R-squared: 0.9932, Adjusted R-squared: 0.9898
## F-statistic: 291.8 on 3 and 6 DF, p-value: 6.882e-07
Visualisasi
# Pair Plot
pairs(data, main = "Scatterplot Matrix", pch = 19)
# Residual Plot
plot(model$residuals, main="Residual Plot", ylab="Residuals"
, xlab="Fitted Values")
abline(h = 0, col="red", lwd=2)
# Data Baru untuk Prediksi
new_data <- data.frame(price=175, marketing_cost=48, stores=68, income=6500)
# Prediksi Volume Penjualan
predicted_sales <- predict(model, newdata = new_data)
print(predicted_sales)
## 1
## 274.9569