#install.packages("brant")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(brant)
## Warning: package 'brant' was built under R version 4.5.3
library(nnet)
# Load Data
df <- read.csv("Ecommerce_DBS.csv", check.names = FALSE)
colnames(df) <- tolower(trimws(gsub(" ", "_", colnames(df))))
colnames(df)[colnames(df) == "longituide"] <- "longitude"
# Fungsi Kategorisasi NPS
df$customer_category <- ifelse(df$nps <= 6, "Detractor",
ifelse(df$nps <= 8, "Passive", "Promoter"))
#VARIABEL ORDINAL
df$customer_category <- factor(df$customer_category,
levels = c("Detractor", "Passive", "Promoter"),
ordered = TRUE)
df <- df %>% dplyr::select(-c(nps, customer_id, state, country))
# Parse Tanggal
df$purchase_date <- dmy(df$purchase_date)
df$month <- month(df$purchase_date)
df$day <- day(df$purchase_date)
df <- df %>% dplyr::select(-purchase_date)
# Format Variabel Kategorik
cat_cols <- c("product_category", "gender", "source")
df[cat_cols] <- lapply(df[cat_cols], as.factor)
# Standardisasi Variabel Numerik (Ekuivalen StandardScaler)
num_cols <- c("product_price", "quantity", "total_purchase_amount",
"customer_age", "latitude", "longitude")
df[num_cols] <- scale(df[num_cols])
# Hapus baris dengan NA / inf
df <- na.omit(df)
model_ordinal <- polr(customer_category ~ ., data = df, Hess = TRUE)
summary(model_ordinal)
## Call:
## polr(formula = customer_category ~ ., data = df, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## product_categoryClothing -5.428e-03 0.0104753 -0.51815
## product_categoryElectronics -2.331e-02 0.0117155 -1.98944
## product_categoryHome 1.183e-02 0.0117056 1.01097
## product_price -1.291e-03 0.0040569 -0.31833
## quantity -1.264e-03 0.0040584 -0.31156
## total_purchase_amount -2.588e-03 0.0040618 -0.63715
## customer_age -2.280e-03 0.0040618 -0.56144
## genderMale 4.552e-04 0.0081136 0.05610
## sourceInstagram Campign 3.609e-03 0.0109657 0.32915
## sourceOrganic Search -5.253e-03 0.0122940 -0.42724
## sourceSEM -6.933e-04 0.0111030 -0.06244
## latitude 4.076e-04 0.0044002 0.09264
## longitude -6.186e-03 0.0044093 -1.40282
## month 3.315e-05 0.0012098 0.02740
## day 1.220e-03 0.0004614 2.64471
##
## Intercepts:
## Value Std. Error t value
## Detractor|Passive 0.5790 0.0150 38.5219
## Passive|Promoter 1.5182 0.0153 98.9073
##
## Residual Deviance: 453167.70
## AIC: 453201.70
# Asumsi Terpenuhi jika nilai VIF < 10 (ideal < 5)
cat("\n--- UJI MULTIKOLINEARITAS ---\n")
##
## --- UJI MULTIKOLINEARITAS ---
vif_values <- vif(model_ordinal)
print(vif_values)
## GVIF Df GVIF^(1/(2*Df))
## product_category 1.000203 3 1.000034
## product_price 1.000071 1 1.000035
## quantity 1.000090 1 1.000045
## total_purchase_amount 1.002695 1 1.001347
## customer_age 1.002950 1 1.001474
## gender 1.000050 1 1.000025
## source 1.005186 3 1.000862
## latitude 1.177144 1 1.084963
## longitude 1.176746 1 1.084779
## month 1.000053 1 1.000026
## day 1.000085 1 1.000042
# Mencari jumlah data ekstrem di setiap variabel kontinu
check_outliers <- function(x) { sum(abs(x) > 3, na.rm = TRUE) }
outliers_count <- sapply(df[num_cols], check_outliers)
print(outliers_count)
## product_price quantity total_purchase_amount
## 0 0 0
## customer_age latitude longitude
## 0 0 7480
cat("UJI PROPORTIONAL ODDS (BRANT)")
## UJI PROPORTIONAL ODDS (BRANT)
brant_result <- brant(model_ordinal)
## ------------------------------------------------------------
## Test for X2 df probability
## ------------------------------------------------------------
## Omnibus 14.96 15 0.45
## product_categoryClothing 0 1 0.96
## product_categoryElectronics 2.13 1 0.14
## product_categoryHome 0.11 1 0.74
## product_price 0.08 1 0.78
## quantity 2.4 1 0.12
## total_purchase_amount 0.91 1 0.34
## customer_age 0 1 0.98
## genderMale 1.58 1 0.21
## sourceInstagram Campign 0.99 1 0.32
## sourceOrganic Search 0.19 1 0.67
## sourceSEM 1.8 1 0.18
## latitude 1.57 1 0.21
## longitude 0.86 1 0.35
## month 0.08 1 0.77
## day 2.82 1 0.09
## ------------------------------------------------------------
##
## H0: Parallel Regression Assumption holds
print(brant_result)
## X2 df probability
## Omnibus 1.495763e+01 15 0.45447383
## product_categoryClothing 2.228098e-03 1 0.96235165
## product_categoryElectronics 2.130014e+00 1 0.14443947
## product_categoryHome 1.083105e-01 1 0.74207590
## product_price 7.703925e-02 1 0.78135057
## quantity 2.401493e+00 1 0.12121948
## total_purchase_amount 9.132271e-01 1 0.33925963
## customer_age 3.652649e-04 1 0.98475183
## genderMale 1.579978e+00 1 0.20876387
## sourceInstagram Campign 9.875503e-01 1 0.32034183
## sourceOrganic Search 1.866618e-01 1 0.66570963
## sourceSEM 1.801753e+00 1 0.17950072
## latitude 1.570828e+00 1 0.21008686
## longitude 8.623676e-01 1 0.35307698
## month 8.314654e-02 1 0.77307775
## day 2.821358e+00 1 0.09301766
cat("UJI LINEARITAS LOGIT (BOX-TIDWELL)")
## UJI LINEARITAS LOGIT (BOX-TIDWELL)
df_boxtidwell <- df
for(col in num_cols) {
df_boxtidwell[[paste0(col, "_log")]] <- df_boxtidwell[[col]] * log(abs(df_boxtidwell[[col]]) + 1)
}
model_box <- multinom(customer_category ~ ., data = df_boxtidwell, trace = FALSE)
z_stats <- summary(model_box)$coefficients / summary(model_box)$standard.errors
p_values <- (1 - pnorm(abs(z_stats), 0, 1)) * 2
log_vars <- grep("_log", colnames(p_values), value = TRUE)
print(p_values[, log_vars])
## product_price_log quantity_log total_purchase_amount_log
## Passive 0.3808863 0.5250455 0.5548015
## Promoter 0.1122128 0.1024917 0.7396634
## customer_age_log latitude_log longitude_log
## Passive 0.9779159 0.16529785 0.8951178
## Promoter 0.5841280 0.06957204 0.2704104