#Read Data
# MEMUAT PACKAGE DAN DATA
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.5.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.2
library(mice)
## Warning: package 'mice' was built under R version 4.5.2
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(visdat)
## Warning: package 'visdat' was built under R version 4.5.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Set seed untuk reproduksibilitas
set.seed(123)
# Muat data
data <- read.csv("C:/Users/ASUS/Downloads/AnReg/best sellin books 2023.csv")
head(data)
## id Book.name
## 1 #1 Atomic Habits: An Easy & Proven Way to Build Good Habits & Break Bad Ones
## 2 #2 Iron Flame (Standard Edition) (The Empyrean, 2)
## 3 #3 Spare
## 4 #4 Fourth Wing
## 5 #5 The Woman in Me
## 6 #6 Lessons in Chemistry: A Novel
## Author Rating reviews.count form
## 1 James Clear 4.8 out of 5 stars 145747 Hardcover
## 2 Rebecca Yarros 4.7 out of 5 stars 395512 Hardcover
## 3 Prince Harry The Duke of Sussex 4.5 out of 5 stars 116101 Hardcover
## 4 Rebecca Yarros 4.8 out of 5 stars 472618 Paperback
## 5 Britney Spears 4.4 out of 5 stars 51520 Hardcover
## 6 Bonnie Garmus 4.6 out of 5 stars 322650 Hardcover
## price Reading.age Print.Length Publishing.date Genre
## 1 $18.88 320 16/10/2018 Self-Improvement
## 2 $11.05 640 07/11/2023 Fiction & Action & Adventure
## 3 $11.99 416 January 10, 2023 Biographies & Memoirs
## 4 $13.62 544 17/09/2024 Fiction & Action & Adventure
## 5 $11.37 288 October 24, 2023 Biographies & Memoirs
## 6 $13.94 400 April 5, 2022 Fiction & Action & Adventure
#Missing Data
# Ringkasan missing values
analyze_missing_values <- function(data) {
cat(" ANALISIS DATA HILANG \n\n")
# Total missing values
total_missing <- sum(is.na(data))
cat("Total nilai hilang:", total_missing, "\n")
cat("Persentase nilai hilang:", round(total_missing / (nrow(data) * ncol(data)) * 100, 2), "%\n\n")
# Missing values per variabel
missing_per_var <- sapply(data, function(x) sum(is.na(x)))
missing_df <- data.frame(
Variable = names(missing_per_var),
Missing_Count = missing_per_var,
Missing_Percentage = round(missing_per_var / nrow(data) * 100, 2)
) %>%
arrange(desc(Missing_Count))
print("Missing values per variabel:")
print(missing_df)
# Pattern missing values
cat("\nPattern data hilang (5 observasi pertama):\n")
print(head(md.pattern(data, plot = FALSE), 5))
# Visualisasi missing values
par(mfrow = c(1, 2))
# Plot 1: Aggregation plot
tryCatch({
aggr_plot <- aggr(data, col = c('navyblue', 'red'),
numbers = TRUE, sortVars = TRUE,
labels = names(data), cex.axis = 0.7,
gap = 3, ylab = c("Histogram data hilang", "Pattern"))
}, error = function(e) {
cat("Gagal membuat plot aggr, menggunakan alternatif...\n")
})
# Plot 2: Heatmap missing values
vis_miss(data, warn_large_data = FALSE) +
theme_minimal() +
labs(title = "Heatmap Data Hilang")
par(mfrow = c(1, 1))
return(missing_df)
}
# Jalankan analisis missing values
missing_analysis <- analyze_missing_values(data)
## ANALISIS DATA HILANG
##
## Total nilai hilang: 2
## Persentase nilai hilang: 0.18 %
##
## [1] "Missing values per variabel:"
## Variable Missing_Count Missing_Percentage
## Print.Length Print.Length 2 2
## id id 0 0
## Book.name Book.name 0 0
## Author Author 0 0
## Rating Rating 0 0
## reviews.count reviews.count 0 0
## form form 0 0
## price price 0 0
## Reading.age Reading.age 0 0
## Publishing.date Publishing.date 0 0
## Genre Genre 0 0
##
## Pattern data hilang (5 observasi pertama):
## id Book.name Author Rating reviews.count form price Reading.age
## 98 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0
## Publishing.date Genre Print.Length
## 98 1 1 1 0
## 2 1 1 0 1
## 0 0 2 2
## Gagal membuat plot aggr, menggunakan alternatif...
#Penanganan Data Hilang
# Metode penanganan data hilang
handle_missing_values <- function(data, method = "mean") {
cat("\n PENANGANAN DATA HILANG \n")
cat("Metode yang digunakan:", method, "\n\n")
data_clean <- data
# Identifikasi variabel numerik dan kategorikal
numeric_vars <- sapply(data_clean, is.numeric)
categorical_vars <- sapply(data_clean, function(x) is.character(x) | is.factor(x))
# Tangani missing values untuk variabel numerik
for(var in names(data_clean)[numeric_vars]) {
na_count <- sum(is.na(data_clean[[var]]))
if(na_count > 0) {
cat("Variabel", var, "memiliki", na_count, "nilai hilang\n")
if(method == "mean") {
# Imputasi dengan mean
mean_val <- mean(data_clean[[var]], na.rm = TRUE)
data_clean[[var]][is.na(data_clean[[var]])] <- mean_val
cat(" Diimputasi dengan mean:", round(mean_val, 2), "\n")
} else if(method == "median") {
# Imputasi dengan median
median_val <- median(data_clean[[var]], na.rm = TRUE)
data_clean[[var]][is.na(data_clean[[var]])] <- median_val
cat(" Diimputasi dengan median:", median_val, "\n")
} else if(method == "knn") {
# Imputasi dengan KNN (sederhana)
require(VIM)
temp_data <- data_clean[, numeric_vars]
knn_imputed <- kNN(temp_data, variable = var, k = 5)
data_clean[[var]] <- knn_imputed[[var]]
cat(" Diimputasi dengan KNN (k=5)\n")
} else if(method == "delete") {
# Hapus baris dengan missing values
cat(" Opsi hapus: akan dihapus dalam langkah terpisah\n")
}
}
}
# Tangani missing values untuk variabel kategorikal
for(var in names(data_clean)[categorical_vars]) {
na_count <- sum(is.na(data_clean[[var]]))
if(na_count > 0) {
cat("Variabel kategorikal", var, "memiliki", na_count, "nilai hilang\n")
# Imputasi dengan modus
mode_val <- names(sort(table(data_clean[[var]]), decreasing = TRUE))[1]
data_clean[[var]][is.na(data_clean[[var]])] <- mode_val
cat(" Diimputasi dengan modus:", mode_val, "\n")
}
}
# Opsi: Hapus baris dengan missing values
if(method == "delete") {
rows_before <- nrow(data_clean)
data_clean <- na.omit(data_clean)
rows_after <- nrow(data_clean)
cat("\nDihapus", rows_before - rows_after, "baris dengan missing values\n")
}
# Verifikasi setelah imputasi
cat("\nMissing values setelah penanganan:\n")
print(colSums(is.na(data_clean)))
return(data_clean)
}
# Contoh penggunaan metode imputasi yang berbeda
data_mean_imputed <- handle_missing_values(data, method = "mean")
##
## PENANGANAN DATA HILANG
## Metode yang digunakan: mean
##
## Variabel Print.Length memiliki 2 nilai hilang
## Diimputasi dengan mean: 361.22
##
## Missing values setelah penanganan:
## id Book.name Author Rating reviews.count
## 0 0 0 0 0
## form price Reading.age Print.Length Publishing.date
## 0 0 0 0 0
## Genre
## 0
data_median_imputed <- handle_missing_values(data, method = "median")
##
## PENANGANAN DATA HILANG
## Metode yang digunakan: median
##
## Variabel Print.Length memiliki 2 nilai hilang
## Diimputasi dengan median: 344
##
## Missing values setelah penanganan:
## id Book.name Author Rating reviews.count
## 0 0 0 0 0
## form price Reading.age Print.Length Publishing.date
## 0 0 0 0 0
## Genre
## 0
#Outlier
visualize_outliers <- function(data) {
cat("\n VISUALISASI OUTLIER \n")
# Bersihkan karakter non-numerik
data$price <- as.numeric(gsub("[^0-9.]", "", data$price))
data$reviews.count <- as.numeric(gsub("[^0-9.]", "", data$reviews.count))
# Cek apakah semua NA
if(all(is.na(data$price)) | all(is.na(data$reviews.count))) {
stop("Kolom price atau reviews.count menjadi NA semua setelah konversi. Cek format data!")
}
# Hapus NA supaya plot tidak error
clean_data <- na.omit(data[, c("price", "reviews.count")])
# BOX PLOT
par(mfrow = c(1, 2))
boxplot(clean_data$price,
main = "Boxplot Price",
col = "lightblue",
ylab = "Price",
outline = TRUE)
grid()
boxplot(clean_data$reviews.count,
main = "Boxplot Reviews Count",
col = "lightgreen",
ylab = "Reviews Count",
outline = TRUE)
grid()
par(mfrow = c(1, 1))
# HISTOGRAM
plot_hist_outlier <- function(variable, var_name) {
q1 <- quantile(variable, 0.25)
q3 <- quantile(variable, 0.75)
iqr_val <- IQR(variable)
lower_bound <- q1 - 1.5 * iqr_val
upper_bound <- q3 + 1.5 * iqr_val
ggplot(data.frame(value = variable), aes(x = value)) +
geom_histogram(aes(y = after_stat(density)),
bins = 30,
fill = "lightblue",
alpha = 0.7) +
geom_density(color = "darkblue", linewidth = 1) +
geom_vline(xintercept = c(lower_bound, upper_bound),
color = "red",
linetype = "dashed",
linewidth = 1) +
labs(title = paste("Distribusi & Outlier:", var_name),
x = var_name,
y = "Density") +
theme_minimal()
}
print(plot_hist_outlier(clean_data$price, "Price"))
print(plot_hist_outlier(clean_data$reviews.count, "Reviews Count"))
# ================= SCATTER =================
print(
ggplot(clean_data, aes(x = price, y = reviews.count)) +
geom_point(color = "blue", alpha = 0.6, size = 3) +
labs(title = "Scatter Plot: Price vs Reviews Count",
x = "Price",
y = "Reviews Count") +
theme_minimal()
)
}
# Jalankan
visualize_outliers(data)
##
## VISUALISASI OUTLIER
#Penanganan Outlier
# Metode penanganan outlier
handle_outliers <- function(data, method = "cap") {
cat("\n PENANGANAN OUTLIER \n")
cat("Metode yang digunakan:", method, "\n\n")
data_processed <- data
for(var in names(data_processed)[sapply(data_processed, is.numeric)]) {
# Hitung batas outlier
q1 <- quantile(data_processed[[var]], 0.25, na.rm = TRUE)
q3 <- quantile(data_processed[[var]], 0.75, na.rm = TRUE)
iqr_val <- IQR(data_processed[[var]], na.rm = TRUE)
lower_bound <- q1 - 1.5 * iqr_val
upper_bound <- q3 + 1.5 * iqr_val
# Identifikasi outlier
is_outlier <- data_processed[[var]] < lower_bound | data_processed[[var]] > upper_bound
n_outliers <- sum(is_outlier, na.rm = TRUE)
if(n_outliers > 0) {
cat("Variabel", var, "memiliki", n_outliers, "outlier\n")
if(method == "cap") {
# Cap outliers (Winsorizing)
data_processed[[var]][data_processed[[var]] < lower_bound] <- lower_bound
data_processed[[var]][data_processed[[var]] > upper_bound] <- upper_bound
cat(" Outlier di-cap ke batas [", round(lower_bound, 2), ", ",
round(upper_bound, 2), "]\n", sep = "")
} else if(method == "remove") {
# Hapus outlier (set sebagai NA)
data_processed[[var]][is_outlier] <- NA
cat(" Outlier dihapus (dijadikan NA)\n")
} else if(method == "transform") {
# Transformasi logaritmik
if(all(data_processed[[var]] > 0, na.rm = TRUE)) {
data_processed[[var]] <- log(data_processed[[var]] + 1)
cat(" Dilakukan transformasi log\n")
} else {
cat(" Transformasi log tidak dapat dilakukan (nilai negatif)\n")
}
} else if(method == "median") {
# Ganti dengan median
median_val <- median(data_processed[[var]], na.rm = TRUE)
data_processed[[var]][is_outlier] <- median_val
cat(" Diganti dengan median:", round(median_val, 2), "\n")
}
}
}
return(data_processed)
}
# Contoh penggunaan metode penanganan outlier
data_capped <- handle_outliers(data, method = "cap")
##
## PENANGANAN OUTLIER
## Metode yang digunakan: cap
##
## Variabel reviews.count memiliki 11 outlier
## Outlier di-cap ke batas [-128434.2, 283837.8]
## Variabel Print.Length memiliki 3 outlier
## Outlier di-cap ke batas [-152, 808]
data_no_outliers <- handle_outliers(data, method = "remove")
##
## PENANGANAN OUTLIER
## Metode yang digunakan: remove
##
## Variabel reviews.count memiliki 11 outlier
## Outlier dihapus (dijadikan NA)
## Variabel Print.Length memiliki 3 outlier
## Outlier dihapus (dijadikan NA)
#Analisis Deskriptif dan Visualisasi
# ANALISIS DESKRIPTIF DAN VISUALISASI
print("Statistik Deskriptif:")
## [1] "Statistik Deskriptif:"
summary(data)
## id Book.name Author Rating
## Length:100 Length:100 Length:100 Length:100
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## reviews.count form price Reading.age
## Min. : 3296 Length:100 Length:100 Length:100
## 1st Qu.: 26168 Class :character Class :character Class :character
## Median : 77085 Mode :character Mode :character Mode :character
## Mean :111733
## 3rd Qu.:129236
## Max. :653111
##
## Print.Length Publishing.date Genre
## Min. : 24.0 Length:100 Length:100
## 1st Qu.: 208.0 Class :character Class :character
## Median : 344.0 Mode :character Mode :character
## Mean : 361.2
## 3rd Qu.: 448.0
## Max. :2896.0
## NA's :2
# Scatter plot
ggplot(data, aes(x = price, y = reviews.count)) +
geom_point(color = "blue", size = 3) +
labs(title = "Hubungan price dan reviews.count",
x = "price", y = "review.count") +
theme_minimal()
str(data$price)
## chr [1:100] "$18.88" "$11.05" "$11.99" "$13.62" "$11.37" "$13.94" "$13.99" ...
str(data$reviews.count)
## int [1:100] 145747 395512 116101 472618 51520 322650 87375 82564 228242 19946 ...
# Bersihkan price
data$price <- gsub("\\$", "", data$price)
data$price <- gsub(",", "", data$price)
data$price <- as.numeric(data$price)
# Pastikan reviews.count numeric juga
data$reviews.count <- as.numeric(data$reviews.count)
# Korelasi
cor_test <- cor.test(data$price, data$reviews.count)
print(paste("Korelasi Pearson:", round(cor_test$estimate, 4)))
## [1] "Korelasi Pearson: -0.0837"
print(paste("p-value korelasi:", round(cor_test$p.value, 4)))
## [1] "p-value korelasi: 0.4077"
#Membangun Model
#MEMBANGUN MODEL REGRESI
model <- lm(reviews.count ~ price, data = data)
cat("Ringkasan Model Regresi:\n")
## Ringkasan Model Regresi:
print(summary(model))
##
## Call:
## lm(formula = reviews.count ~ price, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -114972 -86607 -33963 21690 538407
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 127742 22872 5.585 2.09e-07 ***
## price -1346 1618 -0.832 0.408
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 123500 on 98 degrees of freedom
## Multiple R-squared: 0.007007, Adjusted R-squared: -0.003125
## F-statistic: 0.6916 on 1 and 98 DF, p-value: 0.4077
# UJI ASUMSI REGRESI LINEAR
cat("\n UJI ASUMSI REGRESI LINEAR \n")
##
## UJI ASUMSI REGRESI LINEAR
# Normalitas Residual
shapiro_test <- shapiro.test(residuals(model))
cat("\n1. UJI NORMALITAS (Shapiro-Wilk):\n")
##
## 1. UJI NORMALITAS (Shapiro-Wilk):
cat(" Statistik W =", round(shapiro_test$statistic, 4), "\n")
## Statistik W = 0.7791
cat(" p-value =", round(shapiro_test$p.value, 4), "\n")
## p-value = 0
if(shapiro_test$p.value > 0.05) {
cat(" Keputusan: Residual berdistribusi normal\n")
} else {
cat(" Keputusan: Residual tidak berdistribusi normal\n")
}
## Keputusan: Residual tidak berdistribusi normal
# Q-Q Plot
qqnorm(residuals(model), main = "Q-Q Plot Residual")
qqline(residuals(model), col = "red")
# Homoskedastisitas
bp_test <- bptest(model)
cat("\n2. UJI HOMOSKEDASTISITAS (Breusch-Pagan):\n")
##
## 2. UJI HOMOSKEDASTISITAS (Breusch-Pagan):
cat(" Statistik LM =", round(bp_test$statistic, 4), "\n")
## Statistik LM = 0.5164
cat(" p-value =", round(bp_test$p.value, 4), "\n")
## p-value = 0.4724
if(bp_test$p.value > 0.05) {
cat(" Keputusan: Varian residual homogen (tidak ada heteroskedastisitas)\n")
} else {
cat(" Keputusan: Terjadi heteroskedastisitas\n")
}
## Keputusan: Varian residual homogen (tidak ada heteroskedastisitas)
# Plot Residual vs Fitted
plot(fitted(model), residuals(model),
main = "Residual vs Fitted Values",
xlab = "Fitted Values",
ylab = "Residuals",
pch = 19, col = "blue")
abline(h = 0, col = "red", lty = 2)
# UJI AUTOKORELASI (Durbin-Watson)
dw_test <- dwtest(model)
cat("\n3. UJI AUTOKORELASI (Durbin-Watson):\n")
##
## 3. UJI AUTOKORELASI (Durbin-Watson):
cat(" Statistik DW =", round(dw_test$statistic, 4), "\n")
## Statistik DW = 1.753
cat(" p-value =", round(dw_test$p.value, 4), "\n")
## p-value = 0.105
if(dw_test$p.value > 0.05) {
cat(" Keputusan: Tidak terdapat autokorelasi\n")
} else {
cat(" Keputusan: Terdapat autokorelasi\n")
}
## Keputusan: Tidak terdapat autokorelasi
# INTERPRETASI KOEFISIEN
cat("\n INTERPRETASI KOEFISIEN \n")
##
## INTERPRETASI KOEFISIEN
intercept <- coef(model)[1]
slope <- coef(model)[2]
cat("Persamaan Regresi: reviews.count =",
round(intercept, 2), "+",
round(slope, 2), "* price\n")
## Persamaan Regresi: reviews.count = 127741.8 + -1345.54 * price
cat("\nInterpretasi:\n")
##
## Interpretasi:
cat("1. Intercept (β0 =", round(intercept, 2), "):\n")
## 1. Intercept (β0 = 127741.8 ):
cat(" Perkiraan jumlah reviews.count ketika price = 0 adalah",
round(intercept, 2), "\n")
## Perkiraan jumlah reviews.count ketika price = 0 adalah 127741.8
cat("2. Slope (β1 =", round(slope, 2), "):\n")
## 2. Slope (β1 = -1345.54 ):
cat(" Setiap kenaikan 1 satuan price akan mengubah reviews.count sebesar",
round(slope, 2), "\n")
## Setiap kenaikan 1 satuan price akan mengubah reviews.count sebesar -1345.54
# ESTIMASI PARAMETER DAN INFERENSI
cat("\n ESTIMASI PARAMETER \n")
##
## ESTIMASI PARAMETER
conf_int <- confint(model, level = 0.95)
cat("Interval Kepercayaan 95%:\n")
## Interval Kepercayaan 95%:
cat(" Intercept: [", round(conf_int[1,1], 3), ", ",
round(conf_int[1,2], 3), "]\n", sep = "")
## Intercept: [82352.39, 173131.1]
cat(" Slope: [", round(conf_int[2,1], 3), ", ",
round(conf_int[2,2], 3), "]\n", sep = "")
## Slope: [-4556.43, 1865.357]
# Uji hipotesis untuk slope
cat("\nUji Hipotesis untuk Slope (β1):\n")
##
## Uji Hipotesis untuk Slope (β1):
cat(" H0: β1 = 0 (tidak ada hubungan linear antara price dan reviews.count)\n")
## H0: β1 = 0 (tidak ada hubungan linear antara price dan reviews.count)
cat(" H1: β1 ≠ 0 (ada hubungan linear antara price dan reviews.count)\n")
## H1: β1 ≠ 0 (ada hubungan linear antara price dan reviews.count)
summary_model <- summary(model)
slope_pvalue <- summary_model$coefficients[2, 4]
cat(" p-value =", round(slope_pvalue, 6), "\n")
## p-value = 0.407658
if(slope_pvalue < 0.05) {
cat(" Keputusan: Tolak H0, terdapat hubungan linear yang signifikan\n")
} else {
cat(" Keputusan: Gagal tolak H0, tidak terdapat hubungan linear yang signifikan\n")
}
## Keputusan: Gagal tolak H0, tidak terdapat hubungan linear yang signifikan
# KOEFISIEN DETERMINASI
r_squared <- summary_model$r.squared
cat("\nKoefisien Determinasi (R²):\n")
##
## Koefisien Determinasi (R²):
cat(" R² =", round(r_squared, 4), "\n")
## R² = 0.007
cat(" Artinya:", round(r_squared * 100, 2),
"% variasi reviews.count dapat dijelaskan oleh price\n")
## Artinya: 0.7 % variasi reviews.count dapat dijelaskan oleh price
# VISUALISASI MODEL
ggplot(data, aes(x = price, y = reviews.count)) +
geom_point(color = "blue", size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "pink") +
labs(title = "Regresi Linear: Price vs Reviews Count",
subtitle = paste("Y =", round(intercept, 2),
"+", round(slope, 2), "X"),
x = "Price",
y = "Reviews Count") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# PREDIKSI
new_data <- data.frame(price = c(10, 20)) # ganti sesuai harga yang ingin diprediksi
prediction <- predict(model, newdata = new_data, interval = "confidence")
cat("\n=== PREDIKSI ===\n")
##
## === PREDIKSI ===
cat("Untuk price =", new_data$price[1],
", prediksi reviews.count =", round(prediction[1, "fit"], 2), "\n")
## Untuk price = 10 , prediksi reviews.count = 114286.4
cat("Untuk price =", new_data$price[2],
", prediksi reviews.count =", round(prediction[2, "fit"], 2), "\n")
## Untuk price = 20 , prediksi reviews.count = 100831
# DIAGNOSTIC PLOTS
par(mfrow = c(2, 2))
plot(model, which = 1:4)
par(mfrow = c(1, 1))
cat("\n RINGKASAN ANALISIS \n")
##
## RINGKASAN ANALISIS
cat("1. Model: reviews.count = β0 + β1 * price + ε\n")
## 1. Model: reviews.count = β0 + β1 * price + ε
cat("2. Estimasi: Y =",
round(intercept, 3), "+",
round(slope, 3), "* price\n")
## 2. Estimasi: Y = 127741.8 + -1345.536 * price
cat("3. R² =", round(r_squared, 4),
"(", round(r_squared * 100, 1), "%)\n")
## 3. R² = 0.007 ( 0.7 %)
# Uji F (ambil p-value yang benar)
f_stat <- summary_model$fstatistic
f_pvalue <- pf(f_stat[1], f_stat[2], f_stat[3], lower.tail = FALSE)
cat("4. Uji F (model): p-value =",
round(f_pvalue, 6), "\n")
## 4. Uji F (model): p-value = 0.407658
cat("5. Asumsi:\n")
## 5. Asumsi:
cat(" - Normalitas: p =", round(shapiro_test$p.value, 4), "\n")
## - Normalitas: p = 0
cat(" - Homoskedastisitas: p =", round(bp_test$p.value, 4), "\n")
## - Homoskedastisitas: p = 0.4724
cat(" - Autokorelasi: p =", round(dw_test$p.value, 4), "\n")
## - Autokorelasi: p = 0.105
hasil <- list(
model = model,
coefficients = coef(model),
r_squared = r_squared,
f_pvalue = f_pvalue,
assumptions = list(
normality = shapiro_test$p.value,
homoscedasticity = bp_test$p.value,
autocorrelation = dw_test$p.value
),
confidence_intervals = conf_int
)
cat("\nAnalisis regresi linear sederhana selesai!\n")
##
## Analisis regresi linear sederhana selesai!