Multinominal

#install.packages("brant")

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(car)

## Warning: package 'car' was built under R version 4.5.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.5.3

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(brant)

## Warning: package 'brant' was built under R version 4.5.3

library(nnet)

# Load Data
df <- read.csv("Ecommerce_DBS.csv", check.names = FALSE)

colnames(df) <- tolower(trimws(gsub(" ", "_", colnames(df))))
colnames(df)[colnames(df) == "longituide"] <- "longitude"

# Fungsi Kategorisasi NPS
df$customer_category <- ifelse(df$nps <= 6, "Detractor",
                        ifelse(df$nps <= 8, "Passive", "Promoter"))

#VARIABEL ORDINAL 
df$customer_category <- factor(df$customer_category, 
                               levels = c("Detractor", "Passive", "Promoter"), 
                               ordered = TRUE)

df <- df %>% dplyr::select(-c(nps, customer_id, state, country))

# Parse Tanggal
df$purchase_date <- dmy(df$purchase_date)
df$month <- month(df$purchase_date)
df$day <- day(df$purchase_date)
df <- df %>% dplyr::select(-purchase_date)

# Format Variabel Kategorik
cat_cols <- c("product_category", "gender", "source")
df[cat_cols] <- lapply(df[cat_cols], as.factor)

# Standardisasi Variabel Numerik (Ekuivalen StandardScaler)
num_cols <- c("product_price", "quantity", "total_purchase_amount", 
              "customer_age", "latitude", "longitude")
df[num_cols] <- scale(df[num_cols])

# Hapus baris dengan NA / inf
df <- na.omit(df)

model_ordinal <- polr(customer_category ~ ., data = df, Hess = TRUE)
summary(model_ordinal)

## Call:
## polr(formula = customer_category ~ ., data = df, Hess = TRUE)
## 
## Coefficients:
##                                  Value Std. Error  t value
## product_categoryClothing    -5.428e-03  0.0104753 -0.51815
## product_categoryElectronics -2.331e-02  0.0117155 -1.98944
## product_categoryHome         1.183e-02  0.0117056  1.01097
## product_price               -1.291e-03  0.0040569 -0.31833
## quantity                    -1.264e-03  0.0040584 -0.31156
## total_purchase_amount       -2.588e-03  0.0040618 -0.63715
## customer_age                -2.280e-03  0.0040618 -0.56144
## genderMale                   4.552e-04  0.0081136  0.05610
## sourceInstagram Campign      3.609e-03  0.0109657  0.32915
## sourceOrganic Search        -5.253e-03  0.0122940 -0.42724
## sourceSEM                   -6.933e-04  0.0111030 -0.06244
## latitude                     4.076e-04  0.0044002  0.09264
## longitude                   -6.186e-03  0.0044093 -1.40282
## month                        3.315e-05  0.0012098  0.02740
## day                          1.220e-03  0.0004614  2.64471
## 
## Intercepts:
##                   Value   Std. Error t value
## Detractor|Passive  0.5790  0.0150    38.5219
## Passive|Promoter   1.5182  0.0153    98.9073
## 
## Residual Deviance: 453167.70 
## AIC: 453201.70

# Asumsi Terpenuhi jika nilai VIF < 10 (ideal < 5)
cat("\n--- UJI MULTIKOLINEARITAS ---\n")

## 
## --- UJI MULTIKOLINEARITAS ---

vif_values <- vif(model_ordinal)
print(vif_values)

##                           GVIF Df GVIF^(1/(2*Df))
## product_category      1.000203  3        1.000034
## product_price         1.000071  1        1.000035
## quantity              1.000090  1        1.000045
## total_purchase_amount 1.002695  1        1.001347
## customer_age          1.002950  1        1.001474
## gender                1.000050  1        1.000025
## source                1.005186  3        1.000862
## latitude              1.177144  1        1.084963
## longitude             1.176746  1        1.084779
## month                 1.000053  1        1.000026
## day                   1.000085  1        1.000042

# Mencari jumlah data ekstrem di setiap variabel kontinu
check_outliers <- function(x) { sum(abs(x) > 3, na.rm = TRUE) }
outliers_count <- sapply(df[num_cols], check_outliers)
print(outliers_count)

##         product_price              quantity total_purchase_amount 
##                     0                     0                     0 
##          customer_age              latitude             longitude 
##                     0                     0                  7480

cat("UJI PROPORTIONAL ODDS (BRANT)")

## UJI PROPORTIONAL ODDS (BRANT)

brant_result <- brant(model_ordinal)

## ------------------------------------------------------------ 
## Test for         X2  df  probability 
## ------------------------------------------------------------ 
## Omnibus              14.96   15  0.45
## product_categoryClothing 0   1   0.96
## product_categoryElectronics  2.13    1   0.14
## product_categoryHome     0.11    1   0.74
## product_price            0.08    1   0.78
## quantity         2.4 1   0.12
## total_purchase_amount        0.91    1   0.34
## customer_age         0   1   0.98
## genderMale           1.58    1   0.21
## sourceInstagram Campign  0.99    1   0.32
## sourceOrganic Search     0.19    1   0.67
## sourceSEM            1.8 1   0.18
## latitude         1.57    1   0.21
## longitude            0.86    1   0.35
## month                0.08    1   0.77
## day              2.82    1   0.09
## ------------------------------------------------------------ 
## 
## H0: Parallel Regression Assumption holds

print(brant_result)

##                                       X2 df probability
## Omnibus                     1.495763e+01 15  0.45447383
## product_categoryClothing    2.228098e-03  1  0.96235165
## product_categoryElectronics 2.130014e+00  1  0.14443947
## product_categoryHome        1.083105e-01  1  0.74207590
## product_price               7.703925e-02  1  0.78135057
## quantity                    2.401493e+00  1  0.12121948
## total_purchase_amount       9.132271e-01  1  0.33925963
## customer_age                3.652649e-04  1  0.98475183
## genderMale                  1.579978e+00  1  0.20876387
## sourceInstagram Campign     9.875503e-01  1  0.32034183
## sourceOrganic Search        1.866618e-01  1  0.66570963
## sourceSEM                   1.801753e+00  1  0.17950072
## latitude                    1.570828e+00  1  0.21008686
## longitude                   8.623676e-01  1  0.35307698
## month                       8.314654e-02  1  0.77307775
## day                         2.821358e+00  1  0.09301766

cat("UJI LINEARITAS LOGIT (BOX-TIDWELL)")

## UJI LINEARITAS LOGIT (BOX-TIDWELL)

df_boxtidwell <- df
for(col in num_cols) {
  df_boxtidwell[[paste0(col, "_log")]] <- df_boxtidwell[[col]] * log(abs(df_boxtidwell[[col]]) + 1)
}
model_box <- multinom(customer_category ~ ., data = df_boxtidwell, trace = FALSE)
z_stats <- summary(model_box)$coefficients / summary(model_box)$standard.errors
p_values <- (1 - pnorm(abs(z_stats), 0, 1)) * 2

log_vars <- grep("_log", colnames(p_values), value = TRUE)
print(p_values[, log_vars])

##          product_price_log quantity_log total_purchase_amount_log
## Passive          0.3808863    0.5250455                 0.5548015
## Promoter         0.1122128    0.1024917                 0.7396634
##          customer_age_log latitude_log longitude_log
## Passive         0.9779159   0.16529785     0.8951178
## Promoter        0.5841280   0.06957204     0.2704104

Multinominal

Sandi Ardi Prayitno (24031554037), Chaesar Giveson (24031554058) , Nagatan Alief Putra Silahen (24031554086)

2026-04-25