#install.packages("brant")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(MASS)      
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(car)       
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(brant)     
## Warning: package 'brant' was built under R version 4.5.3
library(nnet)
# Load Data
df <- read.csv("Ecommerce_DBS.csv", check.names = FALSE)
colnames(df) <- tolower(trimws(gsub(" ", "_", colnames(df))))
colnames(df)[colnames(df) == "longituide"] <- "longitude"
# Fungsi Kategorisasi NPS
df$customer_category <- ifelse(df$nps <= 6, "Detractor",
                        ifelse(df$nps <= 8, "Passive", "Promoter"))
#VARIABEL ORDINAL 
df$customer_category <- factor(df$customer_category, 
                               levels = c("Detractor", "Passive", "Promoter"), 
                               ordered = TRUE)
df <- df %>% dplyr::select(-c(nps, customer_id, state, country))
# Parse Tanggal
df$purchase_date <- dmy(df$purchase_date)
df$month <- month(df$purchase_date)
df$day <- day(df$purchase_date)
df <- df %>% dplyr::select(-purchase_date)
# Format Variabel Kategorik
cat_cols <- c("product_category", "gender", "source")
df[cat_cols] <- lapply(df[cat_cols], as.factor)
# Standardisasi Variabel Numerik (Ekuivalen StandardScaler)
num_cols <- c("product_price", "quantity", "total_purchase_amount", 
              "customer_age", "latitude", "longitude")
df[num_cols] <- scale(df[num_cols])
# Hapus baris dengan NA / inf
df <- na.omit(df)
model_ordinal <- polr(customer_category ~ ., data = df, Hess = TRUE)
summary(model_ordinal)
## Call:
## polr(formula = customer_category ~ ., data = df, Hess = TRUE)
## 
## Coefficients:
##                                  Value Std. Error  t value
## product_categoryClothing    -5.428e-03  0.0104753 -0.51815
## product_categoryElectronics -2.331e-02  0.0117155 -1.98944
## product_categoryHome         1.183e-02  0.0117056  1.01097
## product_price               -1.291e-03  0.0040569 -0.31833
## quantity                    -1.264e-03  0.0040584 -0.31156
## total_purchase_amount       -2.588e-03  0.0040618 -0.63715
## customer_age                -2.280e-03  0.0040618 -0.56144
## genderMale                   4.552e-04  0.0081136  0.05610
## sourceInstagram Campign      3.609e-03  0.0109657  0.32915
## sourceOrganic Search        -5.253e-03  0.0122940 -0.42724
## sourceSEM                   -6.933e-04  0.0111030 -0.06244
## latitude                     4.076e-04  0.0044002  0.09264
## longitude                   -6.186e-03  0.0044093 -1.40282
## month                        3.315e-05  0.0012098  0.02740
## day                          1.220e-03  0.0004614  2.64471
## 
## Intercepts:
##                   Value   Std. Error t value
## Detractor|Passive  0.5790  0.0150    38.5219
## Passive|Promoter   1.5182  0.0153    98.9073
## 
## Residual Deviance: 453167.70 
## AIC: 453201.70
# Asumsi Terpenuhi jika nilai VIF < 10 (ideal < 5)
cat("\n--- UJI MULTIKOLINEARITAS ---\n")
## 
## --- UJI MULTIKOLINEARITAS ---
vif_values <- vif(model_ordinal)
print(vif_values)
##                           GVIF Df GVIF^(1/(2*Df))
## product_category      1.000203  3        1.000034
## product_price         1.000071  1        1.000035
## quantity              1.000090  1        1.000045
## total_purchase_amount 1.002695  1        1.001347
## customer_age          1.002950  1        1.001474
## gender                1.000050  1        1.000025
## source                1.005186  3        1.000862
## latitude              1.177144  1        1.084963
## longitude             1.176746  1        1.084779
## month                 1.000053  1        1.000026
## day                   1.000085  1        1.000042
# Mencari jumlah data ekstrem di setiap variabel kontinu
check_outliers <- function(x) { sum(abs(x) > 3, na.rm = TRUE) }
outliers_count <- sapply(df[num_cols], check_outliers)
print(outliers_count)
##         product_price              quantity total_purchase_amount 
##                     0                     0                     0 
##          customer_age              latitude             longitude 
##                     0                     0                  7480
cat("UJI PROPORTIONAL ODDS (BRANT)")
## UJI PROPORTIONAL ODDS (BRANT)
brant_result <- brant(model_ordinal)
## ------------------------------------------------------------ 
## Test for         X2  df  probability 
## ------------------------------------------------------------ 
## Omnibus              14.96   15  0.45
## product_categoryClothing 0   1   0.96
## product_categoryElectronics  2.13    1   0.14
## product_categoryHome     0.11    1   0.74
## product_price            0.08    1   0.78
## quantity         2.4 1   0.12
## total_purchase_amount        0.91    1   0.34
## customer_age         0   1   0.98
## genderMale           1.58    1   0.21
## sourceInstagram Campign  0.99    1   0.32
## sourceOrganic Search     0.19    1   0.67
## sourceSEM            1.8 1   0.18
## latitude         1.57    1   0.21
## longitude            0.86    1   0.35
## month                0.08    1   0.77
## day              2.82    1   0.09
## ------------------------------------------------------------ 
## 
## H0: Parallel Regression Assumption holds
print(brant_result)
##                                       X2 df probability
## Omnibus                     1.495763e+01 15  0.45447383
## product_categoryClothing    2.228098e-03  1  0.96235165
## product_categoryElectronics 2.130014e+00  1  0.14443947
## product_categoryHome        1.083105e-01  1  0.74207590
## product_price               7.703925e-02  1  0.78135057
## quantity                    2.401493e+00  1  0.12121948
## total_purchase_amount       9.132271e-01  1  0.33925963
## customer_age                3.652649e-04  1  0.98475183
## genderMale                  1.579978e+00  1  0.20876387
## sourceInstagram Campign     9.875503e-01  1  0.32034183
## sourceOrganic Search        1.866618e-01  1  0.66570963
## sourceSEM                   1.801753e+00  1  0.17950072
## latitude                    1.570828e+00  1  0.21008686
## longitude                   8.623676e-01  1  0.35307698
## month                       8.314654e-02  1  0.77307775
## day                         2.821358e+00  1  0.09301766
cat("UJI LINEARITAS LOGIT (BOX-TIDWELL)")
## UJI LINEARITAS LOGIT (BOX-TIDWELL)
df_boxtidwell <- df
for(col in num_cols) {
  df_boxtidwell[[paste0(col, "_log")]] <- df_boxtidwell[[col]] * log(abs(df_boxtidwell[[col]]) + 1)
}
model_box <- multinom(customer_category ~ ., data = df_boxtidwell, trace = FALSE)
z_stats <- summary(model_box)$coefficients / summary(model_box)$standard.errors
p_values <- (1 - pnorm(abs(z_stats), 0, 1)) * 2

log_vars <- grep("_log", colnames(p_values), value = TRUE)
print(p_values[, log_vars])
##          product_price_log quantity_log total_purchase_amount_log
## Passive          0.3808863    0.5250455                 0.5548015
## Promoter         0.1122128    0.1024917                 0.7396634
##          customer_age_log latitude_log longitude_log
## Passive         0.9779159   0.16529785     0.8951178
## Promoter        0.5841280   0.06957204     0.2704104